Skip to content

Commit 165f366

Browse files
committed
optimize out stack alignment for sizes < 32
1 parent c6ca81a commit 165f366

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

src/libcore/mem.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -453,22 +453,26 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
453453
// #[repr(simd)], even if we don't actually use this struct directly.
454454
#[repr(simd)]
455455
struct Block(u64, u64, u64, u64);
456-
let block_size = size_of::<Block>();
456+
struct UnalignedBlock(u64, u64, u64, u64);
457457

458-
// Create some uninitialized memory as scratch space
459-
let mut t: Block = uninitialized();
458+
let block_size = size_of::<Block>();
460459

461-
// Get raw pointers to the bytes of x, y & t for easier manipulation
460+
// Get raw pointers to the bytes of x & y for easier manipulation
462461
let x = x as *mut T as *mut u8;
463462
let y = y as *mut T as *mut u8;
464-
let t = &mut t as *mut _ as *mut u8;
465463

466464
// Loop through x & y, copying them `Block` at a time
467465
// The optimizer should unroll the loop fully for most types
468466
// N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively
469467
let len = size_of::<T>() as isize;
470468
let mut i = 0;
471469
while i + block_size as isize <= len {
470+
// Create some uninitialized memory as scratch space
471+
// Moving the declaration of `t` here avoids aligning the stack when
472+
// this loop is unused
473+
let mut t: Block = uninitialized();
474+
let t = &mut t as *mut _ as *mut u8;
475+
472476
// Swap a block of bytes of x & y, using t as a temporary buffer
473477
// This should be optimized into efficient SIMD operations where available
474478
ptr::copy_nonoverlapping(x.offset(i), t, block_size);
@@ -478,6 +482,9 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
478482
}
479483
if i < len {
480484
// Swap any remaining bytes
485+
let mut t: UnalignedBlock = uninitialized();
486+
let t = &mut t as *mut _ as *mut u8;
487+
481488
let rem = (len - i) as usize;
482489
ptr::copy_nonoverlapping(x.offset(i), t, rem);
483490
ptr::copy_nonoverlapping(y.offset(i), x.offset(i), rem);

0 commit comments

Comments
 (0)