Skip to content

Commit 401b3eb

Browse files
committed
Take control over resizing to enforce required invariants
1 parent 8214ddc commit 401b3eb

File tree

7 files changed

+153
-3
lines changed

7 files changed

+153
-3
lines changed

inst/include/cpp11/doubles.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ inline typename r_vector<double>::underlying_type* r_vector<double>::get_p(bool
3838
}
3939
}
4040

41+
template <>
42+
inline typename r_vector<double>::underlying_type const* r_vector<double>::get_const_p(
43+
bool is_altrep, SEXP data) {
44+
return REAL_OR_NULL(data);
45+
}
46+
4147
template <>
4248
inline void r_vector<double>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
4349
typename r_vector::underlying_type* buf) {

inst/include/cpp11/integers.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ inline typename r_vector<int>::underlying_type* r_vector<int>::get_p(bool is_alt
3939
}
4040
}
4141

42+
template <>
43+
inline typename r_vector<int>::underlying_type const* r_vector<int>::get_const_p(
44+
bool is_altrep, SEXP data) {
45+
return INTEGER_OR_NULL(data);
46+
}
47+
4248
template <>
4349
inline void r_vector<int>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
4450
typename r_vector::underlying_type* buf) {

inst/include/cpp11/list.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,18 @@ inline typename r_vector<SEXP>::underlying_type* r_vector<SEXP>::get_p(bool, SEX
3131
return nullptr;
3232
}
3333

34+
template <>
35+
inline typename r_vector<SEXP>::underlying_type const* r_vector<SEXP>::get_const_p(
36+
bool is_altrep, SEXP data) {
37+
// No `VECTOR_PTR_OR_NULL()`
38+
if (is_altrep) {
39+
return nullptr;
40+
} else {
41+
// TODO: Use `VECTOR_PTR_RO()` conditionally once R 4.5.0 is officially released
42+
return static_cast<SEXP const*>(DATAPTR_RO(data));
43+
}
44+
}
45+
3446
/// Specialization for lists, where `x["oob"]` returns `R_NilValue`, like at the R level
3547
template <>
3648
inline SEXP r_vector<SEXP>::get_oob() {

inst/include/cpp11/logicals.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ inline typename r_vector<r_bool>::underlying_type* r_vector<r_bool>::get_p(bool
3838
}
3939
}
4040

41+
template <>
42+
inline typename r_vector<r_bool>::underlying_type const* r_vector<r_bool>::get_const_p(
43+
bool is_altrep, SEXP data) {
44+
return LOGICAL_OR_NULL(data);
45+
}
46+
4147
template <>
4248
inline void r_vector<r_bool>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
4349
typename r_vector::underlying_type* buf) {

inst/include/cpp11/r_vector.hpp

Lines changed: 106 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <algorithm> // for max
66
#include <array> // for array
77
#include <cstdio> // for snprintf
8+
#include <cstring> // for memcpy
89
#include <exception> // for exception
910
#include <initializer_list> // for initializer_list
1011
#include <iterator> // for forward_iterator_tag, random_ac...
@@ -147,6 +148,8 @@ class r_vector {
147148
/// Implemented in specialization
148149
static underlying_type* get_p(bool is_altrep, SEXP data);
149150
/// Implemented in specialization
151+
static underlying_type const* get_const_p(bool is_altrep, SEXP data);
152+
/// Implemented in specialization
150153
static void get_region(SEXP x, R_xlen_t i, R_xlen_t n, underlying_type* buf);
151154
/// Implemented in specialization
152155
static SEXPTYPE get_sexptype();
@@ -313,7 +316,13 @@ class r_vector : public cpp11::r_vector<T> {
313316
/// Implemented in specialization
314317
static void set_elt(SEXP x, R_xlen_t i, underlying_type value);
315318

319+
static SEXP reserve_data(SEXP x, bool is_altrep, R_xlen_t size);
320+
static SEXP resize_data(SEXP x, bool is_altrep, R_xlen_t size);
321+
static SEXP resize_names(SEXP x, R_xlen_t size);
322+
323+
using cpp11::r_vector<T>::get_elt;
316324
using cpp11::r_vector<T>::get_p;
325+
using cpp11::r_vector<T>::get_const_p;
317326
using cpp11::r_vector<T>::get_sexptype;
318327
};
319328
} // namespace writable
@@ -741,8 +750,25 @@ inline r_vector<T>::r_vector(SEXP&& data, bool is_altrep)
741750
: cpp11::r_vector<T>(data, is_altrep), capacity_(length_) {}
742751

743752
template <typename T>
744-
inline r_vector<T>::r_vector(const r_vector& rhs)
745-
: cpp11::r_vector<T>(safe[Rf_shallow_duplicate](rhs)), capacity_(rhs.capacity_) {}
753+
inline r_vector<T>::r_vector(const r_vector& rhs) {
754+
// We don't want to just pass through to the read-only constructor because we'd
755+
// have to convert to `SEXP` first, which could truncate, and then we'd still have
756+
// to shallow duplicate after that to ensure we have a duplicate, which can result in
757+
// too many copies (#369).
758+
//
759+
// Instead we take control of setting all fields to try and only duplicate 1 time.
760+
// We try and reclaim unused capacity during the duplication by only reserving up to
761+
// the `rhs.length_`. This is nice because if the user returns this object, the
762+
// truncation has already been done and they don't have to pay for another allocation.
763+
// Importantly, `reserve_data()` always duplicates even if there wasn't extra capacity,
764+
// which ensures we have our own copy.
765+
data_ = reserve_data(rhs.data_, rhs.is_altrep_, rhs.length_);
766+
protect_ = detail::store::insert(data_);
767+
is_altrep_ = ALTREP(data_);
768+
data_p_ = get_p(is_altrep_, data_);
769+
length_ = rhs.length_;
770+
capacity_ = rhs.length_;
771+
}
746772

747773
template <typename T>
748774
inline r_vector<T>::r_vector(r_vector&& rhs) {
@@ -987,7 +1013,7 @@ inline void r_vector<T>::reserve(R_xlen_t new_capacity) {
9871013
SEXP old_protect = protect_;
9881014

9891015
data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity)
990-
: safe[Rf_xlengthgets](data_, new_capacity);
1016+
: reserve_data(data_, is_altrep_, new_capacity);
9911017
protect_ = detail::store::insert(data_);
9921018
is_altrep_ = ALTREP(data_);
9931019
data_p_ = get_p(is_altrep_, data_);
@@ -1188,6 +1214,83 @@ inline typename r_vector<T>::iterator r_vector<T>::iterator::operator+(R_xlen_t
11881214
return it;
11891215
}
11901216

1217+
// Compared to `Rf_xlengthgets()`:
1218+
// - This always allocates, even if it is the same size, which is important when we use
1219+
// it in a constructor and need to ensure that it duplicates on the way in.
1220+
// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we
1221+
// use it in constructors and when we truncate right before returning from the `SEXP`
1222+
// operator.
1223+
// - This is more friendly to ALTREP `x`.
1224+
template <typename T>
1225+
inline SEXP r_vector<T>::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) {
1226+
// Resize core data
1227+
SEXP out = PROTECT(resize_data(x, is_altrep, size));
1228+
1229+
// Resize names, if required
1230+
SEXP names = Rf_getAttrib(x, R_NamesSymbol);
1231+
if (names != R_NilValue) {
1232+
names = resize_names(names, size);
1233+
Rf_setAttrib(out, R_NamesSymbol, names);
1234+
}
1235+
1236+
// Copy over "most" attributes.
1237+
// Does not copy over names, dim, or dim names.
1238+
// Names are handled already. Dim and dim names should not be applicable,
1239+
// as this is a vector.
1240+
// Does not look like it would ever error in our use cases, so no `safe[]`.
1241+
Rf_copyMostAttrib(x, out);
1242+
1243+
UNPROTECT(1);
1244+
return out;
1245+
}
1246+
1247+
template <typename T>
1248+
inline SEXP r_vector<T>::resize_data(SEXP x, bool is_altrep, R_xlen_t size) {
1249+
underlying_type const* v_x = get_const_p(is_altrep, x);
1250+
1251+
SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size));
1252+
underlying_type* v_out = get_p(ALTREP(out), out);
1253+
1254+
const R_xlen_t x_size = Rf_xlength(x);
1255+
const R_xlen_t copy_size = (x_size > size) ? size : x_size;
1256+
1257+
// Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly
1258+
// copy everything from `x`)
1259+
if (v_x != nullptr && v_out != nullptr) {
1260+
std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type));
1261+
} else {
1262+
// Handles ALTREP `x` with no const pointer, VECSXP, STRSXP
1263+
for (R_xlen_t i = 0; i < copy_size; ++i) {
1264+
set_elt(out, i, get_elt(x, i));
1265+
}
1266+
}
1267+
1268+
UNPROTECT(1);
1269+
return out;
1270+
}
1271+
1272+
template <typename T>
1273+
inline SEXP r_vector<T>::resize_names(SEXP x, R_xlen_t size) {
1274+
const SEXP* v_x = STRING_PTR_RO(x);
1275+
1276+
SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size));
1277+
1278+
const R_xlen_t x_size = Rf_xlength(x);
1279+
const R_xlen_t copy_size = (x_size > size) ? size : x_size;
1280+
1281+
for (R_xlen_t i = 0; i < copy_size; ++i) {
1282+
SET_STRING_ELT(out, i, v_x[i]);
1283+
}
1284+
1285+
// Ensure remaining names are initialized to `""`
1286+
for (R_xlen_t i = copy_size; i < size; ++i) {
1287+
SET_STRING_ELT(out, i, R_BlankString);
1288+
}
1289+
1290+
UNPROTECT(1);
1291+
return out;
1292+
}
1293+
11911294
} // namespace writable
11921295

11931296
// TODO: is there a better condition we could use, e.g. assert something true

inst/include/cpp11/raws.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ inline typename r_vector<uint8_t>::underlying_type r_vector<uint8_t>::get_elt(
3636
return RAW_ELT(x, i);
3737
}
3838

39+
template <>
40+
inline typename r_vector<uint8_t>::underlying_type const* r_vector<uint8_t>::get_const_p(
41+
bool is_altrep, SEXP data) {
42+
return RAW_OR_NULL(data);
43+
}
44+
3945
template <>
4046
inline typename r_vector<uint8_t>::underlying_type* r_vector<uint8_t>::get_p(
4147
bool is_altrep, SEXP data) {

inst/include/cpp11/strings.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ inline typename r_vector<r_string>::underlying_type* r_vector<r_string>::get_p(b
3434
return nullptr;
3535
}
3636

37+
template <>
38+
inline typename r_vector<r_string>::underlying_type const*
39+
r_vector<r_string>::get_const_p(bool is_altrep, SEXP data) {
40+
// No `STRING_PTR_OR_NULL()`
41+
if (is_altrep) {
42+
return nullptr;
43+
} else {
44+
return STRING_PTR_RO(data);
45+
}
46+
}
47+
3748
template <>
3849
inline void r_vector<r_string>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
3950
typename r_vector::underlying_type* buf) {

0 commit comments

Comments
 (0)