|
5 | 5 | #include <algorithm> // for max
|
6 | 6 | #include <array> // for array
|
7 | 7 | #include <cstdio> // for snprintf
|
| 8 | +#include <cstring> // for memcpy |
8 | 9 | #include <exception> // for exception
|
9 | 10 | #include <initializer_list> // for initializer_list
|
10 | 11 | #include <iterator> // for forward_iterator_tag, random_ac...
|
@@ -147,6 +148,8 @@ class r_vector {
|
147 | 148 | /// Implemented in specialization
|
148 | 149 | static underlying_type* get_p(bool is_altrep, SEXP data);
|
149 | 150 | /// Implemented in specialization
|
| 151 | + static underlying_type const* get_const_p(bool is_altrep, SEXP data); |
| 152 | + /// Implemented in specialization |
150 | 153 | static void get_region(SEXP x, R_xlen_t i, R_xlen_t n, underlying_type* buf);
|
151 | 154 | /// Implemented in specialization
|
152 | 155 | static SEXPTYPE get_sexptype();
|
@@ -313,7 +316,13 @@ class r_vector : public cpp11::r_vector<T> {
|
313 | 316 | /// Implemented in specialization
|
314 | 317 | static void set_elt(SEXP x, R_xlen_t i, underlying_type value);
|
315 | 318 |
|
| 319 | + static SEXP reserve_data(SEXP x, bool is_altrep, R_xlen_t size); |
| 320 | + static SEXP resize_data(SEXP x, bool is_altrep, R_xlen_t size); |
| 321 | + static SEXP resize_names(SEXP x, R_xlen_t size); |
| 322 | + |
| 323 | + using cpp11::r_vector<T>::get_elt; |
316 | 324 | using cpp11::r_vector<T>::get_p;
|
| 325 | + using cpp11::r_vector<T>::get_const_p; |
317 | 326 | using cpp11::r_vector<T>::get_sexptype;
|
318 | 327 | };
|
319 | 328 | } // namespace writable
|
@@ -741,8 +750,25 @@ inline r_vector<T>::r_vector(SEXP&& data, bool is_altrep)
|
741 | 750 | : cpp11::r_vector<T>(data, is_altrep), capacity_(length_) {}
|
742 | 751 |
|
743 | 752 | template <typename T>
|
744 |
| -inline r_vector<T>::r_vector(const r_vector& rhs) |
745 |
| - : cpp11::r_vector<T>(safe[Rf_shallow_duplicate](rhs)), capacity_(rhs.capacity_) {} |
| 753 | +inline r_vector<T>::r_vector(const r_vector& rhs) { |
| 754 | + // We don't want to just pass through to the read-only constructor because we'd |
| 755 | + // have to convert to `SEXP` first, which could truncate, and then we'd still have |
| 756 | + // to shallow duplicate after that to ensure we have a duplicate, which can result in |
| 757 | + // too many copies (#369). |
| 758 | + // |
| 759 | + // Instead we take control of setting all fields to try and only duplicate 1 time. |
| 760 | + // We try and reclaim unused capacity during the duplication by only reserving up to |
| 761 | + // the `rhs.length_`. This is nice because if the user returns this object, the |
| 762 | + // truncation has already been done and they don't have to pay for another allocation. |
| 763 | + // Importantly, `reserve_data()` always duplicates even if there wasn't extra capacity, |
| 764 | + // which ensures we have our own copy. |
| 765 | + data_ = reserve_data(rhs.data_, rhs.is_altrep_, rhs.length_); |
| 766 | + protect_ = detail::store::insert(data_); |
| 767 | + is_altrep_ = ALTREP(data_); |
| 768 | + data_p_ = get_p(is_altrep_, data_); |
| 769 | + length_ = rhs.length_; |
| 770 | + capacity_ = rhs.length_; |
| 771 | +} |
746 | 772 |
|
747 | 773 | template <typename T>
|
748 | 774 | inline r_vector<T>::r_vector(r_vector&& rhs) {
|
@@ -987,7 +1013,7 @@ inline void r_vector<T>::reserve(R_xlen_t new_capacity) {
|
987 | 1013 | SEXP old_protect = protect_;
|
988 | 1014 |
|
989 | 1015 | data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity)
|
990 |
| - : safe[Rf_xlengthgets](data_, new_capacity); |
| 1016 | + : reserve_data(data_, is_altrep_, new_capacity); |
991 | 1017 | protect_ = detail::store::insert(data_);
|
992 | 1018 | is_altrep_ = ALTREP(data_);
|
993 | 1019 | data_p_ = get_p(is_altrep_, data_);
|
@@ -1188,6 +1214,83 @@ inline typename r_vector<T>::iterator r_vector<T>::iterator::operator+(R_xlen_t
|
1188 | 1214 | return it;
|
1189 | 1215 | }
|
1190 | 1216 |
|
| 1217 | +// Compared to `Rf_xlengthgets()`: |
| 1218 | +// - This always allocates, even if it is the same size, which is important when we use |
| 1219 | +// it in a constructor and need to ensure that it duplicates on the way in. |
| 1220 | +// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we |
| 1221 | +// use it in constructors and when we truncate right before returning from the `SEXP` |
| 1222 | +// operator. |
| 1223 | +// - This is more friendly to ALTREP `x`. |
| 1224 | +template <typename T> |
| 1225 | +inline SEXP r_vector<T>::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { |
| 1226 | + // Resize core data |
| 1227 | + SEXP out = PROTECT(resize_data(x, is_altrep, size)); |
| 1228 | + |
| 1229 | + // Resize names, if required |
| 1230 | + SEXP names = Rf_getAttrib(x, R_NamesSymbol); |
| 1231 | + if (names != R_NilValue) { |
| 1232 | + names = resize_names(names, size); |
| 1233 | + Rf_setAttrib(out, R_NamesSymbol, names); |
| 1234 | + } |
| 1235 | + |
| 1236 | + // Copy over "most" attributes. |
| 1237 | + // Does not copy over names, dim, or dim names. |
| 1238 | + // Names are handled already. Dim and dim names should not be applicable, |
| 1239 | + // as this is a vector. |
| 1240 | + // Does not look like it would ever error in our use cases, so no `safe[]`. |
| 1241 | + Rf_copyMostAttrib(x, out); |
| 1242 | + |
| 1243 | + UNPROTECT(1); |
| 1244 | + return out; |
| 1245 | +} |
| 1246 | + |
| 1247 | +template <typename T> |
| 1248 | +inline SEXP r_vector<T>::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { |
| 1249 | + underlying_type const* v_x = get_const_p(is_altrep, x); |
| 1250 | + |
| 1251 | + SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size)); |
| 1252 | + underlying_type* v_out = get_p(ALTREP(out), out); |
| 1253 | + |
| 1254 | + const R_xlen_t x_size = Rf_xlength(x); |
| 1255 | + const R_xlen_t copy_size = (x_size > size) ? size : x_size; |
| 1256 | + |
| 1257 | + // Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly |
| 1258 | + // copy everything from `x`) |
| 1259 | + if (v_x != nullptr && v_out != nullptr) { |
| 1260 | + std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type)); |
| 1261 | + } else { |
| 1262 | + // Handles ALTREP `x` with no const pointer, VECSXP, STRSXP |
| 1263 | + for (R_xlen_t i = 0; i < copy_size; ++i) { |
| 1264 | + set_elt(out, i, get_elt(x, i)); |
| 1265 | + } |
| 1266 | + } |
| 1267 | + |
| 1268 | + UNPROTECT(1); |
| 1269 | + return out; |
| 1270 | +} |
| 1271 | + |
| 1272 | +template <typename T> |
| 1273 | +inline SEXP r_vector<T>::resize_names(SEXP x, R_xlen_t size) { |
| 1274 | + const SEXP* v_x = STRING_PTR_RO(x); |
| 1275 | + |
| 1276 | + SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size)); |
| 1277 | + |
| 1278 | + const R_xlen_t x_size = Rf_xlength(x); |
| 1279 | + const R_xlen_t copy_size = (x_size > size) ? size : x_size; |
| 1280 | + |
| 1281 | + for (R_xlen_t i = 0; i < copy_size; ++i) { |
| 1282 | + SET_STRING_ELT(out, i, v_x[i]); |
| 1283 | + } |
| 1284 | + |
| 1285 | + // Ensure remaining names are initialized to `""` |
| 1286 | + for (R_xlen_t i = copy_size; i < size; ++i) { |
| 1287 | + SET_STRING_ELT(out, i, R_BlankString); |
| 1288 | + } |
| 1289 | + |
| 1290 | + UNPROTECT(1); |
| 1291 | + return out; |
| 1292 | +} |
| 1293 | + |
1191 | 1294 | } // namespace writable
|
1192 | 1295 |
|
1193 | 1296 | // TODO: is there a better condition we could use, e.g. assert something true
|
|
0 commit comments