Skip to content

Commit 07e15a9

Browse files
swolchokfacebook-github-bot
authored andcommitted
Precompute multiplicative inverse when possible in op_div (#5209)
Summary: Pull Request resolved: #5209 Division is generally slower than multiplication in hardware. ghstack-source-id: 242226478 Reviewed By: kimishpatel Differential Revision: D62412539 fbshipit-source-id: 78c659d84579c42da3dc44a8d8c88e31ea369430
1 parent 9845019 commit 07e15a9

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

kernels/optimized/cpu/op_div.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,11 @@ Tensor& opt_div_out(
8484
tensor->const_data_ptr<CTYPE>(),
8585
out.numel());
8686
} else {
87+
Vec inv_scalar_casted_vec(CTYPE(1) / scalar_casted);
8788
executorch::vec::map<CTYPE>(
88-
[scalar_casted](Vec x) { return x / Vec(scalar_casted); },
89+
[inv_scalar_casted_vec](Vec x) {
90+
return x * inv_scalar_casted_vec;
91+
},
8992
out.mutable_data_ptr<CTYPE>(),
9093
tensor->const_data_ptr<CTYPE>(),
9194
out.numel());
@@ -220,8 +223,9 @@ Tensor& opt_div_scalar_out(
220223
CTYPE b_casted = static_cast<CTYPE>(b_val);
221224

222225
using Vec = executorch::vec::Vectorized<CTYPE>;
226+
Vec inv_b_casted_vec(CTYPE(1) / b_casted);
223227
executorch::vec::map<CTYPE>(
224-
[b_casted](Vec x) { return x / Vec(b_casted); },
228+
[inv_b_casted_vec](Vec x) { return x * inv_b_casted_vec; },
225229
out.mutable_data_ptr<CTYPE>(),
226230
a.const_data_ptr<CTYPE>(),
227231
out.numel());
@@ -239,14 +243,16 @@ Tensor& opt_div_scalar_out(
239243
CTYPE_B b_val;
240244
ET_EXTRACT_SCALAR(b, b_val);
241245
CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
246+
CTYPE_IN inv_b_casted = CTYPE_IN(1) / b_casted;
242247

243248
const size_t n = a.numel();
244249
const CTYPE_A* a_data = a.const_data_ptr<CTYPE_A>();
245250
CTYPE_OUT* out_data =
246251
out.mutable_data_ptr<CTYPE_OUT>();
247252
for (auto i = 0; i < n; ++i) {
248253
out_data[i] = static_cast<CTYPE_OUT>(
249-
static_cast<CTYPE_IN>(a_data[i]) / b_casted);
254+
static_cast<CTYPE_IN>(a_data[i]) *
255+
inv_b_casted);
250256
}
251257
});
252258
});

0 commit comments

Comments
 (0)