@@ -84,8 +84,11 @@ Tensor& opt_div_out(
84
84
tensor->const_data_ptr <CTYPE>(),
85
85
out.numel ());
86
86
} else {
87
+ Vec inv_scalar_casted_vec (CTYPE (1 ) / scalar_casted);
87
88
executorch::vec::map<CTYPE>(
88
- [scalar_casted](Vec x) { return x / Vec (scalar_casted); },
89
+ [inv_scalar_casted_vec](Vec x) {
90
+ return x * inv_scalar_casted_vec;
91
+ },
89
92
out.mutable_data_ptr <CTYPE>(),
90
93
tensor->const_data_ptr <CTYPE>(),
91
94
out.numel ());
@@ -220,8 +223,9 @@ Tensor& opt_div_scalar_out(
220
223
CTYPE b_casted = static_cast <CTYPE>(b_val);
221
224
222
225
using Vec = executorch::vec::Vectorized<CTYPE>;
226
+ Vec inv_b_casted_vec (CTYPE (1 ) / b_casted);
223
227
executorch::vec::map<CTYPE>(
224
- [b_casted ](Vec x) { return x / Vec (b_casted) ; },
228
+ [inv_b_casted_vec ](Vec x) { return x * inv_b_casted_vec ; },
225
229
out.mutable_data_ptr <CTYPE>(),
226
230
a.const_data_ptr <CTYPE>(),
227
231
out.numel ());
@@ -239,14 +243,16 @@ Tensor& opt_div_scalar_out(
239
243
CTYPE_B b_val;
240
244
ET_EXTRACT_SCALAR (b, b_val);
241
245
CTYPE_IN b_casted = static_cast <CTYPE_IN>(b_val);
246
+ CTYPE_IN inv_b_casted = CTYPE_IN (1 ) / b_casted;
242
247
243
248
const size_t n = a.numel ();
244
249
const CTYPE_A* a_data = a.const_data_ptr <CTYPE_A>();
245
250
CTYPE_OUT* out_data =
246
251
out.mutable_data_ptr <CTYPE_OUT>();
247
252
for (auto i = 0 ; i < n; ++i) {
248
253
out_data[i] = static_cast <CTYPE_OUT>(
249
- static_cast <CTYPE_IN>(a_data[i]) / b_casted);
254
+ static_cast <CTYPE_IN>(a_data[i]) *
255
+ inv_b_casted);
250
256
}
251
257
});
252
258
});
0 commit comments