@@ -453,22 +453,26 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
453
453
// #[repr(simd)], even if we don't actually use this struct directly.
454
454
#[ repr( simd) ]
455
455
struct Block ( u64 , u64 , u64 , u64 ) ;
456
- let block_size = size_of :: < Block > ( ) ;
456
+ struct UnalignedBlock ( u64 , u64 , u64 , u64 ) ;
457
457
458
- // Create some uninitialized memory as scratch space
459
- let mut t: Block = uninitialized ( ) ;
458
+ let block_size = size_of :: < Block > ( ) ;
460
459
461
- // Get raw pointers to the bytes of x, y & t for easier manipulation
460
+ // Get raw pointers to the bytes of x & y for easier manipulation
462
461
let x = x as * mut T as * mut u8 ;
463
462
let y = y as * mut T as * mut u8 ;
464
- let t = & mut t as * mut _ as * mut u8 ;
465
463
466
464
// Loop through x & y, copying them `Block` at a time
467
465
// The optimizer should unroll the loop fully for most types
468
466
// N.B. We can't use a for loop as the `range` impl calls `mem::swap` recursively
469
467
let len = size_of :: < T > ( ) as isize ;
470
468
let mut i = 0 ;
471
469
while i + block_size as isize <= len {
470
+ // Create some uninitialized memory as scratch space
471
+ // Moving the declaration of `t` here avoids aligning the stack when
472
+ // this loop is unused
473
+ let mut t: Block = uninitialized ( ) ;
474
+ let t = & mut t as * mut _ as * mut u8 ;
475
+
472
476
// Swap a block of bytes of x & y, using t as a temporary buffer
473
477
// This should be optimized into efficient SIMD operations where available
474
478
ptr:: copy_nonoverlapping ( x. offset ( i) , t, block_size) ;
@@ -478,6 +482,9 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
478
482
}
479
483
if i < len {
480
484
// Swap any remaining bytes
485
+ let mut t: UnalignedBlock = uninitialized ( ) ;
486
+ let t = & mut t as * mut _ as * mut u8 ;
487
+
481
488
let rem = ( len - i) as usize ;
482
489
ptr:: copy_nonoverlapping ( x. offset ( i) , t, rem) ;
483
490
ptr:: copy_nonoverlapping ( y. offset ( i) , x. offset ( i) , rem) ;
0 commit comments