Skip to content

Commit a5b3d3a

Browse files
authored
[libc++] Fix {std, ranges}::copy for vector<bool> with small storage types (llvm#131545)
The current implementation of `{std, ranges}::copy` fails to copy `vector<bool>` correctly when the underlying storage type (`__storage_type`) is smaller than `int`, such as `unsigned char`, `unsigned short`, `uint8_t` and `uint16_t`. The root cause is that the unsigned small storage type undergoes integer promotion to (signed) `int`, which is then left and right shifted, leading to UB (before C++20) and sign-bit extension (since C++20) respectively. As a result, the underlying bit mask evaluations become incorrect, causing erroneous copying behavior. This patch resolves the issue by correcting the internal bitwise operations, ensuring that `{std, ranges}::copy` operates correctly for `vector<bool>` with any custom (unsigned) storage types. Fixes llvm#131692.
1 parent 480202f commit a5b3d3a

File tree

4 files changed

+308
-10
lines changed

4 files changed

+308
-10
lines changed

libcxx/include/__algorithm/copy.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
5353
unsigned __clz = __bits_per_word - __first.__ctz_;
5454
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
5555
__n -= __dn;
56-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
56+
__storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __first.__ctz_);
5757
__storage_type __b = *__first.__seg_ & __m;
5858
*__result.__seg_ &= ~__m;
5959
*__result.__seg_ |= __b;
@@ -73,7 +73,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
7373
// do last word
7474
if (__n > 0) {
7575
__first.__seg_ += __nw;
76-
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
76+
__storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
7777
__storage_type __b = *__first.__seg_ & __m;
7878
*__result.__seg_ &= ~__m;
7979
*__result.__seg_ |= __b;
@@ -98,11 +98,11 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
9898
unsigned __clz_f = __bits_per_word - __first.__ctz_;
9999
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
100100
__n -= __dn;
101-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
101+
__storage_type __m = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_);
102102
__storage_type __b = *__first.__seg_ & __m;
103103
unsigned __clz_r = __bits_per_word - __result.__ctz_;
104104
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
105-
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
105+
__m = std::__middle_mask<__storage_type>(__clz_r - __ddn, __result.__ctz_);
106106
*__result.__seg_ &= ~__m;
107107
if (__result.__ctz_ > __first.__ctz_)
108108
*__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_);
@@ -112,7 +112,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
112112
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
113113
__dn -= __ddn;
114114
if (__dn > 0) {
115-
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
115+
__m = std::__trailing_mask<__storage_type>(__bits_per_word - __dn);
116116
*__result.__seg_ &= ~__m;
117117
*__result.__seg_ |= __b >> (__first.__ctz_ + __ddn);
118118
__result.__ctz_ = static_cast<unsigned>(__dn);
@@ -123,7 +123,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
123123
// __first.__ctz_ == 0;
124124
// do middle words
125125
unsigned __clz_r = __bits_per_word - __result.__ctz_;
126-
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
126+
__storage_type __m = std::__leading_mask<__storage_type>(__result.__ctz_);
127127
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
128128
__storage_type __b = *__first.__seg_;
129129
*__result.__seg_ &= ~__m;
@@ -134,17 +134,17 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> _
134134
}
135135
// do last word
136136
if (__n > 0) {
137-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
137+
__m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
138138
__storage_type __b = *__first.__seg_ & __m;
139139
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
140-
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
140+
__m = std::__middle_mask<__storage_type>(__clz_r - __dn, __result.__ctz_);
141141
*__result.__seg_ &= ~__m;
142142
*__result.__seg_ |= __b << __result.__ctz_;
143143
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
144144
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
145145
__n -= __dn;
146146
if (__n > 0) {
147-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
147+
__m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
148148
*__result.__seg_ &= ~__m;
149149
*__result.__seg_ |= __b >> __dn;
150150
__result.__ctz_ = static_cast<unsigned>(__n);

libcxx/include/__fwd/bit_reference.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ struct __size_difference_type_traits;
2828

2929
template <class _StoragePointer>
3030
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
31-
__fill_masked_range(_StoragePointer __word, unsigned __ctz, unsigned __clz, bool __fill_val);
31+
__fill_masked_range(_StoragePointer __word, unsigned __clz, unsigned __ctz, bool __fill_val);
3232

3333
template <class _StorageType>
3434
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _StorageType __trailing_mask(unsigned __clz);

libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/copy.pass.cpp

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
// constexpr OutIter // constexpr after C++17
1313
// copy(InIter first, InIter last, OutIter result);
1414

15+
// XFAIL: FROZEN-CXX03-HEADERS-FIXME
16+
1517
#include <algorithm>
1618
#include <cassert>
1719
#include <vector>
1820

21+
#include "sized_allocator.h"
1922
#include "test_macros.h"
2023
#include "test_iterators.h"
2124
#include "type_algorithms.h"
@@ -109,6 +112,153 @@ TEST_CONSTEXPR_CXX20 bool test() {
109112
assert(test_vector_bool(256));
110113
}
111114

115+
// Validate std::copy with std::vector<bool> iterators and custom storage types.
116+
// Ensure that assigned bits hold the intended values, while unassigned bits stay unchanged.
117+
// Related issue: https://github.com/llvm/llvm-project/issues/131692.
118+
{
119+
//// Tests for std::copy with aligned bits
120+
121+
{ // Test the first (partial) word for uint8_t
122+
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
123+
std::vector<bool, Alloc> in(8, false, Alloc(1));
124+
std::vector<bool, Alloc> out(8, true, Alloc(1));
125+
std::copy(in.begin() + 1, in.begin() + 2, out.begin() + 1); // out[1] = false
126+
assert(out[1] == false);
127+
for (std::size_t i = 0; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
128+
if (i != 1)
129+
assert(out[i] == true);
130+
}
131+
{ // Test the last (partial) word for uint8_t
132+
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
133+
std::vector<bool, Alloc> in(8, false, Alloc(1));
134+
std::vector<bool, Alloc> out(8, true, Alloc(1));
135+
std::copy(in.begin(), in.begin() + 1, out.begin()); // out[0] = false
136+
assert(out[0] == false);
137+
for (std::size_t i = 1; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
138+
assert(out[i] == true);
139+
}
140+
{ // Test middle (whole) words for uint8_t
141+
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
142+
std::vector<bool, Alloc> in(32, true, Alloc(1));
143+
for (std::size_t i = 0; i < in.size(); i += 2)
144+
in[i] = false;
145+
std::vector<bool, Alloc> out(32, false, Alloc(1));
146+
std::copy(in.begin() + 4, in.end() - 4, out.begin() + 4);
147+
for (std::size_t i = 4; i < static_cast<std::size_t>(in.size() - 4); ++i)
148+
assert(in[i] == out[i]);
149+
for (std::size_t i = 0; i < 4; ++i)
150+
assert(out[i] == false);
151+
for (std::size_t i = 28; i < out.size(); ++i)
152+
assert(out[i] == false);
153+
}
154+
155+
{ // Test the first (partial) word for uint16_t
156+
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
157+
std::vector<bool, Alloc> in(16, false, Alloc(1));
158+
std::vector<bool, Alloc> out(16, true, Alloc(1));
159+
std::copy(in.begin() + 1, in.begin() + 3, out.begin() + 1); // out[1..2] = false
160+
assert(out[1] == false);
161+
assert(out[2] == false);
162+
for (std::size_t i = 0; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
163+
if (i != 1 && i != 2)
164+
assert(out[i] == true);
165+
}
166+
{ // Test the last (partial) word for uint16_t
167+
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
168+
std::vector<bool, Alloc> in(16, false, Alloc(1));
169+
std::vector<bool, Alloc> out(16, true, Alloc(1));
170+
std::copy(in.begin(), in.begin() + 2, out.begin()); // out[0..1] = false
171+
assert(out[0] == false);
172+
assert(out[1] == false);
173+
for (std::size_t i = 2; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
174+
assert(out[i] == true);
175+
}
176+
{ // Test middle (whole) words for uint16_t
177+
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
178+
std::vector<bool, Alloc> in(64, true, Alloc(1));
179+
for (std::size_t i = 0; i < in.size(); i += 2)
180+
in[i] = false;
181+
std::vector<bool, Alloc> out(64, false, Alloc(1));
182+
std::copy(in.begin() + 8, in.end() - 8, out.begin() + 8);
183+
for (std::size_t i = 8; i < static_cast<std::size_t>(in.size() - 8); ++i)
184+
assert(in[i] == out[i]);
185+
for (std::size_t i = 0; i < 8; ++i)
186+
assert(out[i] == false);
187+
for (std::size_t i = static_cast<std::size_t>(out.size() - 8); i < out.size(); ++i)
188+
assert(out[i] == false);
189+
}
190+
191+
//// Tests for std::copy with unaligned bits
192+
193+
{ // Test the first (partial) word for uint8_t
194+
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
195+
std::vector<bool, Alloc> in(8, false, Alloc(1));
196+
std::vector<bool, Alloc> out(8, true, Alloc(1));
197+
std::copy(in.begin() + 7, in.end(), out.begin()); // out[0] = false
198+
assert(out[0] == false);
199+
for (std::size_t i = 1; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
200+
assert(out[i] == true);
201+
}
202+
{ // Test the last (partial) word for uint8_t
203+
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
204+
std::vector<bool, Alloc> in(8, false, Alloc(1));
205+
std::vector<bool, Alloc> out(8, true, Alloc(1));
206+
std::copy(in.begin(), in.begin() + 1, out.begin() + 2); // out[2] = false
207+
assert(out[2] == false);
208+
for (std::size_t i = 1; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
209+
if (i != 2)
210+
assert(out[i] == true);
211+
}
212+
{ // Test middle (whole) words for uint8_t
213+
using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
214+
std::vector<bool, Alloc> in(36, true, Alloc(1));
215+
for (std::size_t i = 0; i < in.size(); i += 2)
216+
in[i] = false;
217+
std::vector<bool, Alloc> out(40, false, Alloc(1));
218+
std::copy(in.begin(), in.end(), out.begin() + 4);
219+
for (std::size_t i = 0; i < in.size(); ++i)
220+
assert(in[i] == out[i + 4]);
221+
for (std::size_t i = 0; i < 4; ++i)
222+
assert(out[i] == false);
223+
}
224+
225+
{ // Test the first (partial) word for uint16_t
226+
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
227+
std::vector<bool, Alloc> in(16, false, Alloc(1));
228+
std::vector<bool, Alloc> out(16, true, Alloc(1));
229+
std::copy(in.begin() + 14, in.end(), out.begin()); // out[0..1] = false
230+
assert(out[0] == false);
231+
assert(out[1] == false);
232+
for (std::size_t i = 2; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
233+
assert(out[i] == true);
234+
}
235+
{ // Test the last (partial) word for uint16_t
236+
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
237+
std::vector<bool, Alloc> in(16, false, Alloc(1));
238+
std::vector<bool, Alloc> out(16, true, Alloc(1));
239+
std::copy(in.begin(), in.begin() + 2, out.begin() + 1); // out[1..2] = false
240+
assert(out[1] == false);
241+
assert(out[2] == false);
242+
for (std::size_t i = 0; i < out.size(); ++i) // Ensure that unassigned bits remain unchanged
243+
if (i != 1 && i != 2)
244+
assert(out[i] == true);
245+
}
246+
{ // Test middle (whole) words for uint16_t
247+
using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
248+
std::vector<bool, Alloc> in(72, true, Alloc(1));
249+
for (std::size_t i = 0; i < in.size(); i += 2)
250+
in[i] = false;
251+
std::vector<bool, Alloc> out(80, false, Alloc(1));
252+
std::copy(in.begin(), in.end(), out.begin() + 4);
253+
for (std::size_t i = 0; i < in.size(); ++i)
254+
assert(in[i] == out[i + 4]);
255+
for (std::size_t i = 0; i < 4; ++i)
256+
assert(out[i] == false);
257+
for (std::size_t i = in.size() + 4; i < out.size(); ++i)
258+
assert(out[i] == false);
259+
}
260+
}
261+
112262
return true;
113263
}
114264

0 commit comments

Comments
 (0)