@@ -151,9 +151,7 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) {
151
151
_DEFAULT_FN_ATTRS static __inline__ uint32_t
152
152
__gpu_shuffle_idx_u32 (uint64_t __lane_mask , uint32_t __idx , uint32_t __x ) {
153
153
uint32_t __mask = (uint32_t )__lane_mask ;
154
- uint32_t __bitmask = (__mask >> __idx ) & 1u ;
155
- return - __bitmask &
156
- __nvvm_shfl_sync_idx_i32 (__mask , __x , __idx , __gpu_num_lanes () - 1u );
154
+ return __nvvm_shfl_sync_idx_i32 (__mask , __x , __idx , __gpu_num_lanes () - 1u );
157
155
}
158
156
159
157
// Shuffles the the lanes inside the warp according to the given index.
@@ -162,10 +160,9 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x) {
162
160
uint32_t __hi = (uint32_t )(__x >> 32ull );
163
161
uint32_t __lo = (uint32_t )(__x & 0xFFFFFFFF );
164
162
uint32_t __mask = (uint32_t )__lane_mask ;
165
- uint64_t __bitmask = (__mask >> __idx ) & 1u ;
166
- return - __bitmask & ((uint64_t )__nvvm_shfl_sync_idx_i32 (
167
- __mask , __hi , __idx , __gpu_num_lanes () - 1u )
168
- << 32ull ) |
163
+ return ((uint64_t )__nvvm_shfl_sync_idx_i32 (__mask , __hi , __idx ,
164
+ __gpu_num_lanes () - 1u )
165
+ << 32ull ) |
169
166
((uint64_t )__nvvm_shfl_sync_idx_i32 (__mask , __lo , __idx ,
170
167
__gpu_num_lanes () - 1u ));
171
168
}
0 commit comments