Skip to content

Commit 0357b6c

Browse files
committed
feat: Add generate and streaming-input feature toggles.
That way, it's possible to not compile a bunch of code in `gix` if the writing of packs isn't required.
1 parent 4971a48 commit 0357b6c

File tree

12 files changed

+186
-165
lines changed

12 files changed

+186
-165
lines changed

gix-pack/Cargo.toml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@ autotests = false
1414
doctest = false
1515

1616
[features]
17-
17+
default = ["generate", "streaming-input"]
18+
## generate new packs from a set of objects.
19+
generate = ["dep:gix-traverse", "dep:gix-diff"]
20+
## Receive a pack as datastream and resolve it
21+
streaming-input = []
1822
## Provide a fixed-size allocation-free LRU cache for packs. It's useful if caching is desired while keeping the memory footprint
1923
## for the LRU-cache itself low.
2024
pack-cache-lru-static = ["dep:uluru"]
@@ -25,22 +29,26 @@ object-cache-dynamic = ["dep:clru"]
2529
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
2630
serde = ["dep:serde", "gix-object/serde"]
2731
## Make it possible to compile to the `wasm32-unknown-unknown` target.
28-
wasm = ["gix-diff/wasm"]
32+
wasm = ["gix-diff?/wasm"]
2933

3034
[dependencies]
3135
gix-features = { version = "^0.33.0", path = "../gix-features", features = ["crc32", "rustsha1", "progress", "zlib"] }
3236
gix-path = { version = "^0.9.0", path = "../gix-path" }
3337
gix-hash = { version = "^0.12.0", path = "../gix-hash" }
3438
gix-chunk = { version = "^0.4.4", path = "../gix-chunk" }
3539
gix-object = { version = "^0.35.0", path = "../gix-object" }
36-
gix-traverse = { version = "^0.31.0", path = "../gix-traverse" }
37-
gix-diff = { version = "^0.34.0", path = "../gix-diff" }
3840
gix-hashtable = { version = "^0.3.0", path = "../gix-hashtable" }
3941

42+
# for streaming of packs (input, output)
43+
gix-traverse = { version = "^0.31.0", path = "../gix-traverse", optional = true }
44+
gix-diff = { version = "^0.34.0", path = "../gix-diff", default-features = false, optional = true }
45+
4046
memmap2 = "0.7.0"
4147
smallvec = "1.3.0"
4248
parking_lot = { version = "0.12.0", default-features = false }
4349
thiserror = "1.0.26"
50+
51+
# for caching
4452
uluru = { version = "3.0.0", optional = true }
4553
clru = { version = "0.6.1", optional = true }
4654

gix-pack/src/bundle/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ pub mod init;
33

44
mod find;
55
///
6-
#[cfg(not(feature = "wasm"))]
6+
#[cfg(all(not(feature = "wasm"), feature = "streaming-input"))]
77
pub mod write;
88

99
///

gix-pack/src/data/input/entries_to_bytes.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,11 @@ where
7373
}
7474
self.num_entries += 1;
7575
entry.header.write_to(entry.decompressed_size, &mut self.output)?;
76-
std::io::copy(
77-
&mut entry
76+
self.output.write_all(
77+
entry
7878
.compressed
7979
.as_deref()
8080
.expect("caller must configure generator to keep compressed bytes"),
81-
&mut self.output,
8281
)?;
8382
Ok(entry)
8483
}

gix-pack/src/data/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@ pub mod init {
3737
pub mod entry;
3838

3939
///
40+
#[cfg(feature = "streaming-input")]
4041
pub mod input;
4142

4243
/// Utilities to encode pack data entries and write them to a `Write` implementation to resemble a pack data file.
44+
#[cfg(feature = "generate")]
4345
pub mod output;
4446

4547
/// A slice into a pack file denoting a pack entry.

gix-pack/src/index/encode.rs

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
use std::cmp::Ordering;
2+
3+
pub(crate) const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff;
4+
pub(crate) const HIGH_BIT: u32 = 0x8000_0000;
5+
6+
pub(crate) fn fanout(iter: &mut dyn ExactSizeIterator<Item = u8>) -> [u32; 256] {
7+
let mut fan_out = [0u32; 256];
8+
let entries_len = iter.len() as u32;
9+
let mut iter = iter.enumerate();
10+
let mut idx_and_entry = iter.next();
11+
let mut upper_bound = 0;
12+
13+
for (offset_be, byte) in fan_out.iter_mut().zip(0u8..=255) {
14+
*offset_be = match idx_and_entry.as_ref() {
15+
Some((_idx, first_byte)) => match first_byte.cmp(&byte) {
16+
Ordering::Less => unreachable!("ids should be ordered, and we make sure to keep ahead with them"),
17+
Ordering::Greater => upper_bound,
18+
Ordering::Equal => {
19+
if byte == 255 {
20+
entries_len
21+
} else {
22+
idx_and_entry = iter.find(|(_, first_byte)| *first_byte != byte);
23+
upper_bound = idx_and_entry.as_ref().map_or(entries_len, |(idx, _)| *idx as u32);
24+
upper_bound
25+
}
26+
}
27+
},
28+
None => entries_len,
29+
};
30+
}
31+
32+
fan_out
33+
}
34+
35+
#[cfg(feature = "streaming-input")]
36+
mod function {
37+
use gix_features::{
38+
hash,
39+
progress::{self, DynNestedProgress},
40+
};
41+
use std::io;
42+
43+
use super::{fanout, HIGH_BIT, LARGE_OFFSET_THRESHOLD};
44+
45+
use crate::index::V2_SIGNATURE;
46+
47+
struct Count<W> {
48+
bytes: u64,
49+
inner: W,
50+
}
51+
52+
impl<W> Count<W> {
53+
fn new(inner: W) -> Self {
54+
Count { bytes: 0, inner }
55+
}
56+
}
57+
58+
impl<W> io::Write for Count<W>
59+
where
60+
W: io::Write,
61+
{
62+
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
63+
let written = self.inner.write(buf)?;
64+
self.bytes += written as u64;
65+
Ok(written)
66+
}
67+
68+
fn flush(&mut self) -> io::Result<()> {
69+
self.inner.flush()
70+
}
71+
}
72+
73+
pub(crate) fn write_to(
74+
out: &mut dyn io::Write,
75+
entries_sorted_by_oid: Vec<crate::cache::delta::Item<crate::index::write::TreeEntry>>,
76+
pack_hash: &gix_hash::ObjectId,
77+
kind: crate::index::Version,
78+
progress: &mut dyn DynNestedProgress,
79+
) -> io::Result<gix_hash::ObjectId> {
80+
use io::Write;
81+
assert_eq!(kind, crate::index::Version::V2, "Can only write V2 packs right now");
82+
assert!(
83+
entries_sorted_by_oid.len() <= u32::MAX as usize,
84+
"a pack cannot have more than u32::MAX objects"
85+
);
86+
87+
// Write header
88+
let mut out = Count::new(std::io::BufWriter::with_capacity(
89+
8 * 4096,
90+
hash::Write::new(out, kind.hash()),
91+
));
92+
out.write_all(V2_SIGNATURE)?;
93+
out.write_all(&(kind as u32).to_be_bytes())?;
94+
95+
progress.init(Some(4), progress::steps());
96+
let start = std::time::Instant::now();
97+
let _info = progress.add_child_with_id("writing fan-out table".into(), gix_features::progress::UNKNOWN);
98+
let fan_out = fanout(&mut entries_sorted_by_oid.iter().map(|e| e.data.id.first_byte()));
99+
100+
for value in fan_out.iter() {
101+
out.write_all(&value.to_be_bytes())?;
102+
}
103+
104+
progress.inc();
105+
let _info = progress.add_child_with_id("writing ids".into(), gix_features::progress::UNKNOWN);
106+
for entry in &entries_sorted_by_oid {
107+
out.write_all(entry.data.id.as_slice())?;
108+
}
109+
110+
progress.inc();
111+
let _info = progress.add_child_with_id("writing crc32".into(), gix_features::progress::UNKNOWN);
112+
for entry in &entries_sorted_by_oid {
113+
out.write_all(&entry.data.crc32.to_be_bytes())?;
114+
}
115+
116+
progress.inc();
117+
let _info = progress.add_child_with_id("writing offsets".into(), gix_features::progress::UNKNOWN);
118+
{
119+
let mut offsets64 = Vec::<u64>::new();
120+
for entry in &entries_sorted_by_oid {
121+
let offset: u32 = if entry.offset > LARGE_OFFSET_THRESHOLD {
122+
assert!(
123+
offsets64.len() < LARGE_OFFSET_THRESHOLD as usize,
124+
"Encoding breakdown - way too many 64bit offsets"
125+
);
126+
offsets64.push(entry.offset);
127+
((offsets64.len() - 1) as u32) | HIGH_BIT
128+
} else {
129+
entry.offset as u32
130+
};
131+
out.write_all(&offset.to_be_bytes())?;
132+
}
133+
for value in offsets64 {
134+
out.write_all(&value.to_be_bytes())?;
135+
}
136+
}
137+
138+
out.write_all(pack_hash.as_slice())?;
139+
140+
let bytes_written_without_trailer = out.bytes;
141+
let out = out.inner.into_inner()?;
142+
let index_hash: gix_hash::ObjectId = out.hash.digest().into();
143+
out.inner.write_all(index_hash.as_slice())?;
144+
out.inner.flush()?;
145+
146+
progress.inc();
147+
progress.show_throughput_with(
148+
start,
149+
(bytes_written_without_trailer + 20) as usize,
150+
progress::bytes().expect("unit always set"),
151+
progress::MessageLevel::Success,
152+
);
153+
154+
Ok(index_hash)
155+
}
156+
}
157+
#[cfg(feature = "streaming-input")]
158+
pub(crate) use function::write_to;

gix-pack/src/index/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,12 @@ pub mod init;
141141
pub(crate) mod access;
142142
pub use access::Entry;
143143

144+
pub(crate) mod encode;
144145
///
145146
pub mod traverse;
146147
mod util;
147148
///
148149
pub mod verify;
149150
///
151+
#[cfg(feature = "streaming-input")]
150152
pub mod write;

gix-pack/src/index/util.rs

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::{io, time::Instant};
1+
use std::time::Instant;
22

33
use gix_features::progress::{self, Progress};
44

@@ -19,29 +19,3 @@ pub(crate) fn index_entries_sorted_by_offset_ascending(
1919
progress.show_throughput(start);
2020
v
2121
}
22-
23-
pub(crate) struct Count<W> {
24-
pub bytes: u64,
25-
pub inner: W,
26-
}
27-
28-
impl<W> Count<W> {
29-
pub fn new(inner: W) -> Self {
30-
Count { bytes: 0, inner }
31-
}
32-
}
33-
34-
impl<W> io::Write for Count<W>
35-
where
36-
W: io::Write,
37-
{
38-
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
39-
let written = self.inner.write(buf)?;
40-
self.bytes += written as u64;
41-
Ok(written)
42-
}
43-
44-
fn flush(&mut self) -> io::Result<()> {
45-
self.inner.flush()
46-
}
47-
}

0 commit comments

Comments
 (0)