@@ -115,17 +115,27 @@ _Py_popcount32(uint32_t x)
115
115
const uint32_t M2 = 0x33333333 ;
116
116
// Binary: 0000 1111 0000 1111 ...
117
117
const uint32_t M4 = 0x0F0F0F0F ;
118
- // 256**4 + 256**3 + 256**2 + 256**1
119
- const uint32_t SUM = 0x01010101 ;
120
118
121
119
// Put count of each 2 bits into those 2 bits
122
120
x = x - ((x >> 1 ) & M1 );
123
121
// Put count of each 4 bits into those 4 bits
124
122
x = (x & M2 ) + ((x >> 2 ) & M2 );
125
123
// Put count of each 8 bits into those 8 bits
126
124
x = (x + (x >> 4 )) & M4 ;
127
- // Sum of the 4 byte counts
128
- return (x * SUM ) >> 24 ;
125
+ // Sum of the 4 byte counts.
126
+ // Take care when considering changes to the next line. Portability and
127
+ // correctness are delicate here, thanks to C's "integer promotions" (C99
128
+ // §6.3.1.1p2). On machines where the `int` type has width greater than 32
129
+ // bits, `x` will be promoted to an `int`, and following C's "usual
130
+ // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
131
+ // performed as a multiplication of two `unsigned int` operands. In this
132
+ // case it's critical that we cast back to `uint32_t` in order to keep only
133
+ // the least significant 32 bits. On machines where the `int` type has
134
+ // width no greater than 32, the multiplication is of two 32-bit unsigned
135
+ // integer types, and the (uint32_t) cast is a no-op. In both cases, we
136
+ // avoid the risk of undefined behaviour due to overflow of a
137
+ // multiplication of signed integer types.
138
+ return (uint32_t )(x * 0x01010101U ) >> 24 ;
129
139
#endif
130
140
}
131
141
0 commit comments