Skip to content

Commit 4c03fdb

Browse files
committed
feat!: add hash::bytes_with_header(), also make it 32bit compatible.
That way it's possible to hash entire files as object. Previously it wasn't possible to read more than u32::MAX bytes even on 32 bit system even though we are streaming the data.
1 parent 7891fb1 commit 4c03fdb

File tree

1 file changed

+21
-7
lines changed

1 file changed

+21
-7
lines changed

gix-features/src/hash.rs

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ pub fn hasher(kind: gix_hash::Kind) -> Sha1 {
9696
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
9797
pub fn bytes_of_file(
9898
path: &std::path::Path,
99-
num_bytes_from_start: usize,
99+
num_bytes_from_start: u64,
100100
kind: gix_hash::Kind,
101101
progress: &mut dyn crate::progress::Progress,
102102
should_interrupt: &std::sync::atomic::AtomicBool,
@@ -110,28 +110,42 @@ pub fn bytes_of_file(
110110
)
111111
}
112112

113-
/// Similar to [`bytes_of_file`], but operates on an already open file.
113+
/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
114114
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
115115
pub fn bytes(
116116
read: &mut dyn std::io::Read,
117-
num_bytes_from_start: usize,
117+
num_bytes_from_start: u64,
118118
kind: gix_hash::Kind,
119119
progress: &mut dyn crate::progress::Progress,
120120
should_interrupt: &std::sync::atomic::AtomicBool,
121121
) -> std::io::Result<gix_hash::ObjectId> {
122-
let mut hasher = hasher(kind);
122+
bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
123+
}
124+
125+
/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
126+
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
127+
pub fn bytes_with_hasher(
128+
read: &mut dyn std::io::Read,
129+
num_bytes_from_start: u64,
130+
mut hasher: Sha1,
131+
progress: &mut dyn crate::progress::Progress,
132+
should_interrupt: &std::sync::atomic::AtomicBool,
133+
) -> std::io::Result<gix_hash::ObjectId> {
123134
let start = std::time::Instant::now();
124135
// init progress before the possibility for failure, as convenience in case people want to recover
125-
progress.init(Some(num_bytes_from_start), crate::progress::bytes());
136+
progress.init(
137+
Some(num_bytes_from_start as prodash::progress::Step),
138+
crate::progress::bytes(),
139+
);
126140

127141
const BUF_SIZE: usize = u16::MAX as usize;
128142
let mut buf = [0u8; BUF_SIZE];
129143
let mut bytes_left = num_bytes_from_start;
130144

131145
while bytes_left > 0 {
132-
let out = &mut buf[..BUF_SIZE.min(bytes_left)];
146+
let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
133147
read.read_exact(out)?;
134-
bytes_left -= out.len();
148+
bytes_left -= out.len() as u64;
135149
progress.inc_by(out.len());
136150
hasher.update(out);
137151
if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {

0 commit comments

Comments
 (0)