Skip to content

Commit d2eeed4

Browse files
committed
Implement more simd_reduce_* intrinsics
1 parent 47ff2e0 commit d2eeed4

File tree

2 files changed

+64
-27
lines changed

2 files changed

+64
-27
lines changed

src/intrinsics/mod.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,48 @@ fn simd_pair_for_each_lane<'tcx, M: Module>(
263263
}
264264
}
265265

266+
fn simd_reduce<'tcx, M: Module>(
267+
fx: &mut FunctionCx<'_, 'tcx, M>,
268+
val: CValue<'tcx>,
269+
ret: CPlace<'tcx>,
270+
f: impl Fn(&mut FunctionCx<'_, 'tcx, M>, TyAndLayout<'tcx>, Value, Value) -> Value,
271+
) {
272+
let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, val.layout());
273+
assert_eq!(lane_layout, ret.layout());
274+
275+
let mut res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
276+
for lane_idx in 1..lane_count {
277+
let lane = val
278+
.value_field(fx, mir::Field::new(lane_idx.into()))
279+
.load_scalar(fx);
280+
res_val = f(fx, lane_layout, res_val, lane);
281+
}
282+
let res = CValue::by_val(res_val, lane_layout);
283+
ret.write_cvalue(fx, res);
284+
}
285+
286+
fn simd_reduce_bool<'tcx, M: Module>(
287+
fx: &mut FunctionCx<'_, 'tcx, M>,
288+
val: CValue<'tcx>,
289+
ret: CPlace<'tcx>,
290+
f: impl Fn(&mut FunctionCx<'_, 'tcx, M>, Value, Value) -> Value,
291+
) {
292+
let (_lane_layout, lane_count) = lane_type_and_count(fx.tcx, val.layout());
293+
assert!(ret.layout().ty.is_bool());
294+
295+
let res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
296+
let mut res_val = fx.bcx.ins().band_imm(res_val, 1); // mask to boolean
297+
for lane_idx in 1..lane_count {
298+
let lane = val
299+
.value_field(fx, mir::Field::new(lane_idx.into()))
300+
.load_scalar(fx);
301+
let lane = fx.bcx.ins().band_imm(lane, 1); // mask to boolean
302+
res_val = f(fx, res_val, lane);
303+
}
304+
let res = CValue::by_val(res_val, ret.layout());
305+
ret.write_cvalue(fx, res);
306+
}
307+
266308
fn bool_to_zero_or_max_uint<'tcx>(
267309
fx: &mut FunctionCx<'_, 'tcx, impl Module>,
268310
layout: TyAndLayout<'tcx>,

src/intrinsics/simd.rs

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -233,45 +233,40 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
233233

234234
simd_reduce_add_ordered | simd_reduce_add_unordered, (c v) {
235235
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
236-
let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, v.layout());
237-
assert_eq!(lane_layout.ty, ret.layout().ty);
238-
239-
let mut res_val = v.value_field(fx, mir::Field::new(0)).load_scalar(fx);
240-
for lane_idx in 1..lane_count {
241-
let lane = v.value_field(fx, mir::Field::new(lane_idx.into())).load_scalar(fx);
242-
res_val = if lane_layout.ty.is_floating_point() {
243-
fx.bcx.ins().fadd(res_val, lane)
236+
simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
237+
if lane_layout.ty.is_floating_point() {
238+
fx.bcx.ins().fadd(a, b)
244239
} else {
245-
fx.bcx.ins().iadd(res_val, lane)
246-
};
247-
}
248-
let res = CValue::by_val(res_val, lane_layout);
249-
ret.write_cvalue(fx, res);
240+
fx.bcx.ins().iadd(a, b)
241+
}
242+
});
250243
};
251244

252245
simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v) {
253246
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
254-
let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, v.layout());
255-
assert_eq!(lane_layout.ty, ret.layout().ty);
256-
257-
let mut res_val = v.value_field(fx, mir::Field::new(0)).load_scalar(fx);
258-
for lane_idx in 1..lane_count {
259-
let lane = v.value_field(fx, mir::Field::new(lane_idx.into())).load_scalar(fx);
260-
res_val = if lane_layout.ty.is_floating_point() {
261-
fx.bcx.ins().fmul(res_val, lane)
247+
simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
248+
if lane_layout.ty.is_floating_point() {
249+
fx.bcx.ins().fmul(a, b)
262250
} else {
263-
fx.bcx.ins().imul(res_val, lane)
264-
};
265-
}
266-
let res = CValue::by_val(res_val, lane_layout);
267-
ret.write_cvalue(fx, res);
251+
fx.bcx.ins().imul(a, b)
252+
}
253+
});
254+
};
255+
256+
simd_reduce_all, (c v) {
257+
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
258+
simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b));
259+
};
260+
261+
simd_reduce_any, (c v) {
262+
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
263+
simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
268264
};
269265

270266
// simd_fabs
271267
// simd_saturating_add
272268
// simd_bitmask
273269
// simd_select
274-
// simd_reduce_{add,mul}_{,un}ordered
275270
// simd_rem
276271
}
277272
}

0 commit comments

Comments
 (0)