Skip to content

Commit 406034d

Browse files
mateuszchudykigcbot
authored andcommitted
Improve rounding number to the next power of 2.
Improve rounding number to the next power of 2.
1 parent 452ea7b commit 406034d

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

visa/VisaToG4/TranslateSendLdStLsc.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -225,15 +225,9 @@ static int alignUp(int a, int n) { return n + a - 1 - ((n + a - 1) % a); }
225225
static int lscBlock2dComputeDataRegs(LSC_OP op,
226226
LSC_DATA_SHAPE_BLOCK2D dataShape2d,
227227
int BYTES_PER_REG, int dataSizeBits) {
228-
auto roundUpToPowerOf2 = [](int n) {
229-
while (n & (n - 1))
230-
n++;
231-
return n;
232-
};
233-
234228
bool transpose = dataShape2d.order == LSC_DATA_ORDER_TRANSPOSE;
235229
int grfRowPitchElems =
236-
roundUpToPowerOf2(!transpose ? dataShape2d.width : dataShape2d.height);
230+
RoundUpToPowerOf2(!transpose ? dataShape2d.width : dataShape2d.height);
237231
int blockRows = !transpose ? dataShape2d.height : dataShape2d.width;
238232
int elemsPerGrf = 8 * BYTES_PER_REG / dataSizeBits;
239233
// alignUp needed for padding between blocks; each block pads out to

visa/common.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,4 +184,29 @@ constexpr Type AlignUp(const Type value, const size_t alignment) {
184184
return common - (common % alignment);
185185
}
186186

187+
//
188+
// The function returns:
189+
// - 0 if 'value' = 0,
190+
// - the lowest power of two greater of equal to 'value'.
191+
//
192+
// The main idea of the algorithm is to set all lower bits for each set bit of 'value'.
193+
//
194+
constexpr static uint32_t RoundUpToPowerOf2(uint32_t value) {
195+
// Without decreasing, the result would be:
196+
// - 1 for 'value' = 0
197+
// - the next power of two if 'value' is a power of two.
198+
value--;
199+
200+
value |= value >> 1; // Duplicate each set bit to the 1st bit on the right, so each group of 1s has length >= 2 or ends on the first bit.
201+
value |= value >> 2; // Duplicate each set bit to the 2nd bit on the right, so each group of 1s has length >= 4 or ends on the first bit.
202+
value |= value >> 4; // ..
203+
value |= value >> 8; // ..
204+
value |= value >> 16; // Duplicate each set bit to the 16th bit on the right, so each group of 1s has length >= 32 or ends on the first bit.
205+
206+
// Increase by one to get a power of two.
207+
value++;
208+
209+
return value;
210+
}
211+
187212
#endif //_COMMON_H_

0 commit comments

Comments
 (0)