Skip to content

Commit 0412dea

Browse files
swolchokfacebook-github-bot
authored andcommitted
Manual LICM for numel() in quantize ops (#3785)
Summary: Pull Request resolved: #3785 Profiling showed that numel() is not getting inlined, which was preventing optimization. ghstack-source-id: 228354913 Reviewed By: larryliu0820 Differential Revision: D57988068 fbshipit-source-id: e077721c1ec87c0a3969215bc875e7a633af48ea
1 parent d1d2e7a commit 0412dea

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

kernels/quantized/cpu/op_dequantize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ Tensor& dequantize_per_tensor_out(
9797
* get inlined without LTO, particularly in ATen mode. */ \
9898
auto* out_data_ptr = out.mutable_data_ptr<OUT_CTYPE>(); \
9999
const auto* input_data_ptr = input.const_data_ptr<IN_CTYPE>(); \
100-
for (size_t i = 0; i < input.numel(); i++) { \
100+
const auto input_numel = input.numel(); \
101+
for (size_t i = 0; i < input_numel; i++) { \
101102
out_data_ptr[i] = static_cast<OUT_CTYPE>( \
102103
(input_data_ptr[i] - static_cast<int32_t>(zero_point)) * \
103104
static_cast<float>(scale)); \

kernels/quantized/cpu/op_quantize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ Tensor& quantize_per_tensor_out(
124124
* get inlined without LTO, particularly in ATen mode. */ \
125125
auto* out_data_ptr = out.mutable_data_ptr<OUT_CTYPE>(); \
126126
const auto* input_data_ptr = input.const_data_ptr<IN_CTYPE>(); \
127-
for (size_t i = 0; i < input.numel(); i++) { \
127+
const auto input_numel = input.numel(); \
128+
for (size_t i = 0; i < input_numel; i++) { \
128129
IN_CTYPE value = input_data_ptr[i]; \
129130
out_data_ptr[i] = quantize_val<OUT_CTYPE, IN_CTYPE>( \
130131
scale, zero_point, value, quant_min, quant_max); \

0 commit comments

Comments
 (0)