Skip to content

Commit 4b74996

Browse files
committed
feat!: Make usage of decompression context explicit.
That way, the context can be reused which is more efficient than recreating it from scratch for every little delta to decompress. This leads to a performance gain of 1.3%.
1 parent b7560a2 commit 4b74996

File tree

8 files changed

+81
-29
lines changed

8 files changed

+81
-29
lines changed

gix-pack/src/bundle/find.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,30 @@
1+
use gix_features::zlib;
2+
13
impl crate::Bundle {
2-
/// Find an object with the given [`ObjectId`][gix_hash::ObjectId] and place its data into `out`.
4+
/// Find an object with the given [`ObjectId`](gix_hash::ObjectId) and place its data into `out`.
5+
/// `inflate` is used to decompress objects, and will be reset before first use, but not after the last use.
36
///
4-
/// [`cache`][crate::cache::DecodeEntry] is used to accelerate the lookup.
7+
/// [`cache`](crate::cache::DecodeEntry) is used to accelerate the lookup.
58
///
69
/// **Note** that ref deltas are automatically resolved within this pack only, which makes this implementation unusable
710
/// for thin packs, which by now are expected to be resolved already.
811
pub fn find<'a>(
912
&self,
1013
id: impl AsRef<gix_hash::oid>,
1114
out: &'a mut Vec<u8>,
15+
inflate: &mut zlib::Inflate,
1216
cache: &mut impl crate::cache::DecodeEntry,
1317
) -> Result<Option<(gix_object::Data<'a>, crate::data::entry::Location)>, crate::data::decode::Error> {
1418
let idx = match self.index.lookup(id) {
1519
Some(idx) => idx,
1620
None => return Ok(None),
1721
};
18-
self.get_object_by_index(idx, out, cache).map(Some)
22+
self.get_object_by_index(idx, out, inflate, cache).map(Some)
1923
}
2024

2125
/// Special-use function to get an object given an index previously returned from
22-
/// `internal_find_pack_index`.
26+
/// [index::File::](crate::index::File::lookup()).
27+
/// `inflate` is used to decompress objects, and will be reset before first use, but not after the last use.
2328
///
2429
/// # Panics
2530
///
@@ -28,6 +33,7 @@ impl crate::Bundle {
2833
&self,
2934
idx: u32,
3035
out: &'a mut Vec<u8>,
36+
inflate: &mut zlib::Inflate,
3137
cache: &mut impl crate::cache::DecodeEntry,
3238
) -> Result<(gix_object::Data<'a>, crate::data::entry::Location), crate::data::decode::Error> {
3339
let ofs = self.index.pack_offset_at_index(idx);
@@ -37,6 +43,7 @@ impl crate::Bundle {
3743
.decode_entry(
3844
pack_entry,
3945
out,
46+
inflate,
4047
|id, _out| {
4148
self.index.lookup(id).map(|idx| {
4249
crate::data::decode::entry::ResolvedBase::InPack(

gix-pack/src/data/file/decode/entry.rs

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,22 +75,28 @@ impl Outcome {
7575
/// Decompression of objects
7676
impl File {
7777
/// Decompress the given `entry` into `out` and return the amount of bytes read from the pack data.
78+
/// Note that `inflate` is not reset after usage, but will be reset before using it.
7879
///
7980
/// _Note_ that this method does not resolve deltified objects, but merely decompresses their content
8081
/// `out` is expected to be large enough to hold `entry.size` bytes.
8182
///
8283
/// # Panics
8384
///
8485
/// If `out` isn't large enough to hold the decompressed `entry`
85-
pub fn decompress_entry(&self, entry: &data::Entry, out: &mut [u8]) -> Result<usize, Error> {
86+
pub fn decompress_entry(
87+
&self,
88+
entry: &data::Entry,
89+
inflate: &mut zlib::Inflate,
90+
out: &mut [u8],
91+
) -> Result<usize, Error> {
8692
assert!(
8793
out.len() as u64 >= entry.decompressed_size,
8894
"output buffer isn't large enough to hold decompressed result, want {}, have {}",
8995
entry.decompressed_size,
9096
out.len()
9197
);
9298

93-
self.decompress_entry_from_data_offset(entry.data_offset, out)
99+
self.decompress_entry_from_data_offset(entry.data_offset, inflate, out)
94100
.map_err(Into::into)
95101
}
96102

@@ -121,12 +127,14 @@ impl File {
121127
pub(crate) fn decompress_entry_from_data_offset(
122128
&self,
123129
data_offset: data::Offset,
130+
inflate: &mut zlib::Inflate,
124131
out: &mut [u8],
125132
) -> Result<usize, zlib::inflate::Error> {
126133
let offset: usize = data_offset.try_into().expect("offset representable by machine");
127134
assert!(offset < self.data.len(), "entry offset out of bounds");
128135

129-
zlib::Inflate::default()
136+
inflate.reset();
137+
inflate
130138
.once(&self.data[offset..], out)
131139
.map(|(_status, consumed_in, _consumed_out)| consumed_in)
132140
}
@@ -135,12 +143,14 @@ impl File {
135143
pub(crate) fn decompress_entry_from_data_offset_2(
136144
&self,
137145
data_offset: data::Offset,
146+
inflate: &mut zlib::Inflate,
138147
out: &mut [u8],
139148
) -> Result<(usize, usize), zlib::inflate::Error> {
140149
let offset: usize = data_offset.try_into().expect("offset representable by machine");
141150
assert!(offset < self.data.len(), "entry offset out of bounds");
142151

143-
zlib::Inflate::default()
152+
inflate.reset();
153+
inflate
144154
.once(&self.data[offset..], out)
145155
.map(|(_status, consumed_in, consumed_out)| (consumed_in, consumed_out))
146156
}
@@ -149,6 +159,7 @@ impl File {
149159
/// space to hold the result object.
150160
///
151161
/// The `entry` determines which object to decode, and is commonly obtained with the help of a pack index file or through pack iteration.
162+
/// `inflate` will be used for decompressing entries, and will not be reset after usage, but before first using it.
152163
///
153164
/// `resolve` is a function to lookup objects with the given [`ObjectId`][gix_hash::ObjectId], in case the full object id is used to refer to
154165
/// a base object, instead of an in-pack offset.
@@ -159,6 +170,7 @@ impl File {
159170
&self,
160171
entry: data::Entry,
161172
out: &mut Vec<u8>,
173+
inflate: &mut zlib::Inflate,
162174
resolve: impl Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
163175
delta_cache: &mut impl cache::DecodeEntry,
164176
) -> Result<Outcome, Error> {
@@ -172,15 +184,16 @@ impl File {
172184
.expect("size representable by machine"),
173185
0,
174186
);
175-
self.decompress_entry(&entry, out.as_mut_slice()).map(|consumed_input| {
176-
Outcome::from_object_entry(
177-
entry.header.as_kind().expect("a non-delta entry"),
178-
&entry,
179-
consumed_input,
180-
)
181-
})
187+
self.decompress_entry(&entry, inflate, out.as_mut_slice())
188+
.map(|consumed_input| {
189+
Outcome::from_object_entry(
190+
entry.header.as_kind().expect("a non-delta entry"),
191+
&entry,
192+
consumed_input,
193+
)
194+
})
182195
}
183-
OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, out, delta_cache),
196+
OfsDelta { .. } | RefDelta { .. } => self.resolve_deltas(entry, resolve, inflate, out, delta_cache),
184197
}
185198
}
186199

@@ -191,6 +204,7 @@ impl File {
191204
&self,
192205
last: data::Entry,
193206
resolve: impl Fn(&gix_hash::oid, &mut Vec<u8>) -> Option<ResolvedBase>,
207+
inflate: &mut zlib::Inflate,
194208
out: &mut Vec<u8>,
195209
cache: &mut impl cache::DecodeEntry,
196210
) -> Result<Outcome, Error> {
@@ -278,6 +292,7 @@ impl File {
278292
for (delta_idx, delta) in chain.iter_mut().rev().enumerate() {
279293
let consumed_from_data_offset = self.decompress_entry_from_data_offset(
280294
delta.data_offset,
295+
inflate,
281296
&mut instructions[..delta.decompressed_size],
282297
)?;
283298
let is_last_delta_to_be_applied = delta_idx + 1 == chain_len;
@@ -338,7 +353,7 @@ impl File {
338353
let base_entry = cursor;
339354
debug_assert!(!base_entry.header.is_delta());
340355
object_kind = base_entry.header.as_kind();
341-
self.decompress_entry_from_data_offset(base_entry.data_offset, out)?;
356+
self.decompress_entry_from_data_offset(base_entry.data_offset, inflate, out)?;
342357
}
343358

344359
(first_buffer_size, second_buffer_end)

gix-pack/src/data/file/decode/header.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::{
22
data,
33
data::{delta, file::decode::Error, File},
44
};
5+
use gix_features::zlib;
56

67
/// A return value of a resolve function, which given an [`ObjectId`][gix_hash::ObjectId] determines where an object can be found.
78
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
@@ -37,12 +38,14 @@ impl File {
3738
/// Resolve the object header information starting at `entry`, following the chain of entries as needed.
3839
///
3940
/// The `entry` determines which object to decode, and is commonly obtained with the help of a pack index file or through pack iteration.
41+
/// `inflate` will be used for (partially) decompressing entries, and will be reset before first use, but not after the last use.
4042
///
4143
/// `resolve` is a function to lookup objects with the given [`ObjectId`][gix_hash::ObjectId], in case the full object id
4244
/// is used to refer to a base object, instead of an in-pack offset.
4345
pub fn decode_header(
4446
&self,
4547
mut entry: data::Entry,
48+
inflate: &mut zlib::Inflate,
4649
resolve: impl Fn(&gix_hash::oid) -> Option<ResolvedBase>,
4750
) -> Result<Outcome, Error> {
4851
use crate::data::entry::Header::*;
@@ -60,14 +63,14 @@ impl File {
6063
OfsDelta { base_distance } => {
6164
num_deltas += 1;
6265
if first_delta_decompressed_size.is_none() {
63-
first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?);
66+
first_delta_decompressed_size = Some(self.decode_delta_object_size(inflate, &entry)?);
6467
}
6568
entry = self.entry(entry.base_pack_offset(base_distance))
6669
}
6770
RefDelta { base_id } => {
6871
num_deltas += 1;
6972
if first_delta_decompressed_size.is_none() {
70-
first_delta_decompressed_size = Some(self.decode_delta_object_size(&entry)?);
73+
first_delta_decompressed_size = Some(self.decode_delta_object_size(inflate, &entry)?);
7174
}
7275
match resolve(base_id.as_ref()) {
7376
Some(ResolvedBase::InPack(base_entry)) => entry = base_entry,
@@ -89,9 +92,11 @@ impl File {
8992
}
9093

9194
#[inline]
92-
fn decode_delta_object_size(&self, entry: &data::Entry) -> Result<u64, Error> {
95+
fn decode_delta_object_size(&self, inflate: &mut zlib::Inflate, entry: &data::Entry) -> Result<u64, Error> {
9396
let mut buf = [0_u8; 32];
94-
let used = self.decompress_entry_from_data_offset_2(entry.data_offset, &mut buf)?.1;
97+
let used = self
98+
.decompress_entry_from_data_offset_2(entry.data_offset, inflate, &mut buf)?
99+
.1;
95100
let buf = &buf[..used];
96101
let (_base_size, offset) = delta::decode_header_size(buf);
97102
let (result_size, _offset) = delta::decode_header_size(&buf[offset..]);

gix-pack/src/index/traverse/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::sync::atomic::AtomicBool;
33
use gix_features::{
44
parallel,
55
progress::{Progress, RawProgress},
6+
zlib,
67
};
78

89
use crate::index;
@@ -155,6 +156,7 @@ impl index::File {
155156
pack: &crate::data::File,
156157
cache: &mut C,
157158
buf: &mut Vec<u8>,
159+
inflate: &mut zlib::Inflate,
158160
progress: &mut dyn RawProgress,
159161
index_entry: &index::Entry,
160162
processor: &mut impl FnMut(gix_object::Kind, &[u8], &index::Entry, &dyn RawProgress) -> Result<(), E>,
@@ -169,6 +171,7 @@ impl index::File {
169171
.decode_entry(
170172
pack_entry,
171173
buf,
174+
inflate,
172175
|id, _| {
173176
self.lookup(id).map(|index| {
174177
crate::data::decode::entry::ResolvedBase::InPack(pack.entry(self.pack_offset_at_index(index)))

gix-pack/src/index/traverse/with_lookup.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use gix_features::{
44
parallel::{self, in_parallel_if},
55
progress::{self, Progress},
66
threading::{lock, Mutable, OwnShared},
7+
zlib,
78
};
89

910
use super::{Error, Reducer};
@@ -131,6 +132,7 @@ impl index::File {
131132
(
132133
make_pack_lookup_cache(),
133134
Vec::with_capacity(2048), // decode buffer
135+
zlib::Inflate::default(),
134136
lock(&reduce_progress)
135137
.add_child_with_id(format!("thread {index}"), gix_features::progress::UNKNOWN), // per thread progress
136138
)
@@ -143,7 +145,7 @@ impl index::File {
143145
thread_limit,
144146
state_per_thread,
145147
move |entries: &[index::Entry],
146-
(cache, buf, progress)|
148+
(cache, buf, inflate, progress)|
147149
-> Result<Vec<data::decode::entry::Outcome>, Error<_>> {
148150
progress.init(
149151
Some(entries.len()),
@@ -157,6 +159,7 @@ impl index::File {
157159
pack,
158160
cache,
159161
buf,
162+
inflate,
160163
progress,
161164
index_entry,
162165
&mut processor,

gix-pack/tests/pack/bundle.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
mod locate {
22
use bstr::ByteSlice;
3+
use gix_features::zlib;
34
use gix_object::Kind;
45
use gix_odb::pack;
56

@@ -8,13 +9,19 @@ mod locate {
89
fn locate<'a>(hex_id: &str, out: &'a mut Vec<u8>) -> gix_object::Data<'a> {
910
let bundle = pack::Bundle::at(fixture_path(SMALL_PACK_INDEX), gix_hash::Kind::Sha1).expect("pack and idx");
1011
bundle
11-
.find(hex_to_id(hex_id), out, &mut pack::cache::Never)
12+
.find(
13+
hex_to_id(hex_id),
14+
out,
15+
&mut zlib::Inflate::default(),
16+
&mut pack::cache::Never,
17+
)
1218
.expect("read success")
1319
.expect("id present")
1420
.0
1521
}
1622

1723
mod locate_and_verify {
24+
use gix_features::zlib;
1825
use gix_odb::pack;
1926

2027
use crate::{fixture_path, pack::PACKS_AND_INDICES};
@@ -29,7 +36,12 @@ mod locate {
2936
let mut buf = Vec::new();
3037
for entry in bundle.index.iter() {
3138
let (obj, _location) = bundle
32-
.find(entry.oid, &mut buf, &mut pack::cache::Never)?
39+
.find(
40+
entry.oid,
41+
&mut buf,
42+
&mut zlib::Inflate::default(),
43+
&mut pack::cache::Never,
44+
)?
3345
.expect("id present");
3446
obj.verify_checksum(entry.oid)?;
3547
}

gix-pack/tests/pack/data/file.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,14 @@ mod decode_entry {
105105
let p = pack_at(SMALL_PACK);
106106
let entry = p.entry(offset);
107107
let mut buf = Vec::new();
108-
p.decode_entry(entry, &mut buf, resolve_with_panic, &mut cache::Never)
109-
.expect("valid offset provides valid entry");
108+
p.decode_entry(
109+
entry,
110+
&mut buf,
111+
&mut Default::default(),
112+
resolve_with_panic,
113+
&mut cache::Never,
114+
)
115+
.expect("valid offset provides valid entry");
110116
buf
111117
}
112118
}
@@ -154,7 +160,7 @@ mod resolve_header {
154160

155161
let p = pack_at(SMALL_PACK);
156162
let entry = p.entry(offset);
157-
p.decode_header(entry, resolve_with_panic)
163+
p.decode_header(entry, &mut Default::default(), resolve_with_panic)
158164
.expect("valid offset provides valid entry")
159165
}
160166
}
@@ -206,7 +212,8 @@ mod decompress_entry {
206212

207213
let size = entry.decompressed_size as usize;
208214
let mut buf = vec![0; size];
209-
p.decompress_entry(&entry, &mut buf).expect("valid offset");
215+
p.decompress_entry(&entry, &mut Default::default(), &mut buf)
216+
.expect("valid offset");
210217

211218
buf.resize(entry.decompressed_size as usize, 0);
212219
buf

gix-pack/tests/pack/index.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ fn pack_lookup() -> Result<(), Box<dyn std::error::Error>> {
417417
entry.pack_offset,
418418
"index entry offset and computed pack offset must match"
419419
);
420-
pack.decompress_entry(&pack_entry, &mut buf)?;
420+
pack.decompress_entry(&pack_entry, &mut Default::default(), &mut buf)?;
421421

422422
assert_eq!(
423423
buf.len() as u64,

0 commit comments

Comments
 (0)