Skip to content

Document and restrict some unsafe code in gix-pack #1137

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 12 additions & 14 deletions gix-pack/src/cache/delta/traverse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use gix_features::{
};

use crate::{
cache::delta::{traverse::util::ItemSliceSend, Item, Tree},
cache::delta::{traverse::util::ItemSliceSync, Item, Tree},
data::EntryRange,
};

Expand Down Expand Up @@ -133,25 +133,23 @@ where
let object_progress = OwnShared::new(Mutable::new(object_progress));

let start = std::time::Instant::now();
let child_items = ItemSliceSync::new(&mut self.child_items);
let child_items = &child_items;
in_parallel_with_slice(
&mut self.root_items,
thread_limit,
{
let child_items = ItemSliceSend::new(&mut self.child_items);
{
let object_progress = object_progress.clone();
move |thread_index| {
let _ = &child_items;
resolve::State {
delta_bytes: Vec::<u8>::with_capacity(4096),
fully_resolved_delta_bytes: Vec::<u8>::with_capacity(4096),
progress: Box::new(
threading::lock(&object_progress).add_child(format!("thread {thread_index}")),
),
resolve: resolve.clone(),
modify_base: inspect_object.clone(),
child_items: child_items.clone(),
}
move |thread_index| resolve::State {
delta_bytes: Vec::<u8>::with_capacity(4096),
fully_resolved_delta_bytes: Vec::<u8>::with_capacity(4096),
progress: Box::new(
threading::lock(&object_progress).add_child(format!("thread {thread_index}")),
),
resolve: resolve.clone(),
modify_base: inspect_object.clone(),
child_items,
}
}
},
Expand Down
78 changes: 63 additions & 15 deletions gix-pack/src/cache/delta/traverse/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,81 @@ use gix_features::{progress::Progress, threading, zlib};

use crate::{
cache::delta::{
traverse::{
util::{ItemSliceSend, Node},
Context, Error,
},
traverse::{util::ItemSliceSync, Context, Error},
Item,
},
data,
data::EntryRange,
};

mod node {
use crate::cache::delta::{traverse::util::ItemSliceSync, Item};

/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`].
pub(crate) struct Node<'a, T: Send> {
item: &'a mut Item<T>,
child_items: &'a ItemSliceSync<'a, Item<T>>,
}

impl<'a, T: Send> Node<'a, T> {
/// SAFETY: The child_items must be unique among between users of the `ItemSliceSync`.
#[allow(unsafe_code)]
pub(crate) unsafe fn new(item: &'a mut Item<T>, child_items: &'a ItemSliceSync<'a, Item<T>>) -> Self {
Node { item, child_items }
}
}

impl<'a, T: Send> Node<'a, T> {
/// Returns the offset into the pack at which the `Node`s data is located.
pub fn offset(&self) -> u64 {
self.item.offset
}

/// Returns the slice into the data pack at which the pack entry is located.
pub fn entry_slice(&self) -> crate::data::EntryRange {
self.item.offset..self.item.next_offset
}

/// Returns the node data associated with this node.
pub fn data(&mut self) -> &mut T {
&mut self.item.data
}

/// Returns true if this node has children, e.g. is not a leaf in the tree.
pub fn has_children(&self) -> bool {
!self.item.children.is_empty()
}

/// Transform this `Node` into an iterator over its children.
///
/// Children are `Node`s referring to pack entries whose base object is this pack entry.
pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a {
let children = self.child_items;
// SAFETY: The index is a valid index into the children array.
// SAFETY: The resulting mutable pointer cannot be yielded by any other node.
#[allow(unsafe_code)]
self.item.children.iter().map(move |&index| Node {
item: unsafe { children.get_mut(index as usize) },
child_items: children,
})
}
}
}

pub(crate) struct State<'items, F, MBFN, T: Send> {
pub delta_bytes: Vec<u8>,
pub fully_resolved_delta_bytes: Vec<u8>,
pub progress: Box<dyn Progress>,
pub resolve: F,
pub modify_base: MBFN,
pub child_items: ItemSliceSend<'items, Item<T>>,
pub child_items: &'items ItemSliceSync<'items, Item<T>>,
}

#[allow(clippy::too_many_arguments)]
pub(crate) fn deltas<T, F, MBFN, E, R>(
objects: gix_features::progress::StepShared,
size: gix_features::progress::StepShared,
node: &mut Item<T>,
item: &mut Item<T>,
State {
delta_bytes,
fully_resolved_delta_bytes,
Expand Down Expand Up @@ -67,13 +118,10 @@ where
// each node is a base, and its children always start out as deltas which become a base after applying them.
// These will be pushed onto our stack until all are processed
let root_level = 0;
let mut nodes: Vec<_> = vec![(
root_level,
Node {
item: node,
child_items: child_items.clone(),
},
)];
// SAFETY: The child items are unique
#[allow(unsafe_code)]
let root_node = unsafe { node::Node::new(item, child_items) };
let mut nodes: Vec<_> = vec![(root_level, root_node)];
while let Some((level, mut base)) = nodes.pop() {
if should_interrupt.load(Ordering::Relaxed) {
return Err(Error::Interrupted);
Expand Down Expand Up @@ -186,13 +234,13 @@ where
/// system. Since this thread will take a controlling function, we may spawn one more than that. In threaded mode, we will finish
/// all remaining work.
#[allow(clippy::too_many_arguments)]
pub(crate) fn deltas_mt<T, F, MBFN, E, R>(
fn deltas_mt<T, F, MBFN, E, R>(
mut threads_to_create: isize,
decompressed_bytes_by_pack_offset: BTreeMap<u64, (data::Entry, u64, Vec<u8>)>,
objects: gix_features::progress::StepShared,
size: gix_features::progress::StepShared,
progress: &dyn Progress,
nodes: Vec<(u16, Node<'_, T>)>,
nodes: Vec<(u16, node::Node<'_, T>)>,
resolve: F,
resolve_data: &R,
modify_base: MBFN,
Expand Down
78 changes: 14 additions & 64 deletions gix-pack/src/cache/delta/traverse/util.rs
Original file line number Diff line number Diff line change
@@ -1,86 +1,36 @@
use std::marker::PhantomData;

use crate::cache::delta::Item;

pub(crate) struct ItemSliceSend<'a, T>
pub(crate) struct ItemSliceSync<'a, T>
where
T: Send,
{
items: *mut T,
phantom: PhantomData<&'a T>,
}

impl<'a, T> ItemSliceSend<'a, T>
impl<'a, T> ItemSliceSync<'a, T>
where
T: Send,
{
pub fn new(items: &'a mut [T]) -> Self {
ItemSliceSend {
ItemSliceSync {
items: items.as_mut_ptr(),
phantom: PhantomData,
}
}
}

/// SAFETY: This would be unsafe if this would ever be abused, but it's used internally and only in a way that assure that the pointers
/// don't violate aliasing rules.
impl<T> Clone for ItemSliceSend<'_, T>
where
T: Send,
{
fn clone(&self) -> Self {
ItemSliceSend {
items: self.items,
phantom: self.phantom,
}
/// SAFETY: The index must point into the slice and must not be reused concurrently.
#[allow(unsafe_code)]
pub unsafe fn get_mut(&self, index: usize) -> &'a mut T {
// SAFETY: The children array is alive by the 'a lifetime.
unsafe { &mut *self.items.add(index) }
}
}

// SAFETY: T is `Send`, and we only ever access one T at a time. And, ptrs need that assurance, I wonder if it's always right.
// SAFETY: T is `Send`, and we only use the pointer for creating new pointers.
#[allow(unsafe_code)]
unsafe impl<T> Send for ItemSliceSend<'_, T> where T: Send {}

/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`].
pub(crate) struct Node<'a, T: Send> {
pub item: &'a mut Item<T>,
pub child_items: ItemSliceSend<'a, Item<T>>,
}

impl<'a, T: Send> Node<'a, T> {
/// Returns the offset into the pack at which the `Node`s data is located.
pub fn offset(&self) -> u64 {
self.item.offset
}

/// Returns the slice into the data pack at which the pack entry is located.
pub fn entry_slice(&self) -> crate::data::EntryRange {
self.item.offset..self.item.next_offset
}

/// Returns the node data associated with this node.
pub fn data(&mut self) -> &mut T {
&mut self.item.data
}

/// Returns true if this node has children, e.g. is not a leaf in the tree.
pub fn has_children(&self) -> bool {
!self.item.children.is_empty()
}

/// Transform this `Node` into an iterator over its children.
///
/// Children are `Node`s referring to pack entries whose base object is this pack entry.
pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a {
let children = self.child_items;
self.item.children.iter().map(move |&index| {
// SAFETY: The children array is alive by the 'a lifetime.
// SAFETY: The index is a valid index into the children array.
// SAFETY: The resulting mutable pointer cannot be yielded by any other node.
#[allow(unsafe_code)]
Node {
item: unsafe { &mut *children.items.add(index as usize) },
child_items: children.clone(),
}
})
}
}
unsafe impl<T> Send for ItemSliceSync<'_, T> where T: Send {}
// SAFETY: T is `Send`, and as long as the user follows the contract of
// `get_mut()`, we only ever access one T at a time.
#[allow(unsafe_code)]
unsafe impl<T> Sync for ItemSliceSync<'_, T> where T: Send {}