Skip to content

PHPC-2269: Add internal iterator logic for MongoDB\BSON\Iterator #1454

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 155 additions & 37 deletions src/BSON/Iterator.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,67 @@ static void php_phongo_iterator_build_current(php_phongo_iterator_t* intern)
phongo_bson_value_to_zval(bson_iter_value(&intern->iter), &intern->current);
}

static zval* php_phongo_iterator_get_current(php_phongo_iterator_t* intern)
{
if (!intern->valid) {
phongo_throw_exception(PHONGO_ERROR_LOGIC, "Cannot call current() on an exhausted iterator");
return NULL;
}

if (Z_ISUNDEF(intern->current)) {
php_phongo_iterator_build_current(intern);
}

return &intern->current;
}

static void php_phongo_iterator_next(php_phongo_iterator_t* intern)
{
intern->valid = bson_iter_next(&intern->iter);
intern->key++;
php_phongo_iterator_free_current(intern);
}

static bool php_phongo_iterator_key(php_phongo_iterator_t* intern, zval* key)
{
const char* ckey;

if (!intern->valid) {
phongo_throw_exception(PHONGO_ERROR_LOGIC, "Cannot call key() on an exhausted iterator");
return false;
}

if (intern->is_array) {
ZVAL_LONG(key, intern->key);

return true;
}

ckey = bson_iter_key(&intern->iter);
if (!bson_utf8_validate(ckey, strlen(ckey), false)) {
phongo_throw_exception(PHONGO_ERROR_UNEXPECTED_VALUE, "Detected corrupt BSON data at offset %d", intern->iter.off);
return false;
}

ZVAL_STRING(key, ckey);

return true;
}

static void php_phongo_iterator_rewind(php_phongo_iterator_t* intern)
{
/* Don't re-initialise iterator if we're still on the first item */
if (intern->key == 0) {
return;
}

php_phongo_iterator_free_current(intern);

bson_iter_init(&intern->iter, php_phongo_iterator_get_bson_from_zval(&intern->bson));
intern->key = 0;
intern->valid = bson_iter_next(&intern->iter);
}

static HashTable* php_phongo_iterator_get_properties_hash(phongo_compat_object_handler_type* object, bool is_temp)
{
php_phongo_iterator_t* intern;
Expand All @@ -113,48 +174,33 @@ PHONGO_DISABLED_WAKEUP(MongoDB_BSON_Iterator)
static PHP_METHOD(MongoDB_BSON_Iterator, current)
{
php_phongo_iterator_t* intern = Z_ITERATOR_OBJ_P(getThis());
zval* data;

PHONGO_PARSE_PARAMETERS_NONE();

if (!intern->valid) {
phongo_throw_exception(PHONGO_ERROR_LOGIC, "Cannot call current() on an exhausted iterator");
return;
}

if (Z_ISUNDEF(intern->current)) {
php_phongo_iterator_build_current(intern);
data = php_phongo_iterator_get_current(intern);
if (!data) {
// Exception already thrown
RETURN_NULL();
}

if (Z_ISUNDEF(intern->current)) {
if (Z_ISUNDEF_P(data)) {
RETURN_NULL();
} else {
ZVAL_COPY_DEREF(return_value, &intern->current);
ZVAL_COPY_DEREF(return_value, data);
}
}

static PHP_METHOD(MongoDB_BSON_Iterator, key)
{
const char* key;
php_phongo_iterator_t* intern = Z_ITERATOR_OBJ_P(getThis());

PHONGO_PARSE_PARAMETERS_NONE();

if (!intern->valid) {
phongo_throw_exception(PHONGO_ERROR_LOGIC, "Cannot call key() on an exhausted iterator");
if (!php_phongo_iterator_key(intern, return_value)) {
// Exception already thrown
return;
}

if (intern->is_array) {
RETURN_LONG(intern->key);
}

key = bson_iter_key(&intern->iter);
if (!bson_utf8_validate(key, strlen(key), false)) {
phongo_throw_exception(PHONGO_ERROR_UNEXPECTED_VALUE, "Detected corrupt BSON data at offset %d", intern->iter.off);
return;
}

RETURN_STRING(key);
}

static PHP_METHOD(MongoDB_BSON_Iterator, next)
Expand All @@ -163,9 +209,7 @@ static PHP_METHOD(MongoDB_BSON_Iterator, next)

PHONGO_PARSE_PARAMETERS_NONE();

intern->valid = bson_iter_next(&intern->iter);
intern->key++;
php_phongo_iterator_free_current(intern);
php_phongo_iterator_next(intern);
}

static PHP_METHOD(MongoDB_BSON_Iterator, valid)
Expand All @@ -183,16 +227,7 @@ static PHP_METHOD(MongoDB_BSON_Iterator, rewind)

PHONGO_PARSE_PARAMETERS_NONE();

/* Don't re-initialise iterator if we're still on the first item */
if (intern->key == 0) {
return;
}

php_phongo_iterator_free_current(intern);

bson_iter_init(&intern->iter, php_phongo_iterator_get_bson_from_zval(&intern->bson));
intern->key = 0;
intern->valid = bson_iter_next(&intern->iter);
php_phongo_iterator_rewind(intern);
}

void phongo_iterator_init(zval* return_value, zval* document_or_packedarray)
Expand Down Expand Up @@ -263,10 +298,93 @@ static HashTable* php_phongo_iterator_get_properties(phongo_compat_object_handle
return php_phongo_iterator_get_properties_hash(object, false);
}

/* Iterator handlers */
static void php_phongo_iterator_it_dtor(zend_object_iterator* iter)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This entire logic was shamelessly lifted from spl_array.c

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does "this entire logic" refer to all of the iterator functions in this file (not just dtor here)?

Copy link
Member Author

@alcaeus alcaeus Aug 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - all of the php_phongo_iterator_it_* methods were heavily inspired by PHP's standard implementation.

{
zval_ptr_dtor(&iter->data);
}

static int php_phongo_iterator_it_valid(zend_object_iterator* iter)
{
php_phongo_iterator_t* intern = Z_ITERATOR_OBJ_P(&iter->data);

return intern->valid ? SUCCESS : FAILURE;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm once again reminded how dumb PHP is for having SUCCESS defined as 0, when bools work exactly the other way around. Took me a moment to figure out why as I was initially returning intern->valid directly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been bitten by this before, but I've made peace with assuming PHP's behavior is inspired by process exit codes.

}

static zval* php_phongo_iterator_it_get_current_data(zend_object_iterator* iter)
{
php_phongo_iterator_t* intern = Z_ITERATOR_OBJ_P(&iter->data);

return php_phongo_iterator_get_current(intern);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unlike the current() method itself, there's no need to copy/deref the zval we're returning here. Another notable difference to current() is that we're leaving an IS_UNDEF zval as-is, whereas current returns null in those cases. I don't think it makes a significant difference, and I'm not sure why Cursor::current() (which inspired the BSON iterator) returns null for an undef zval. I know there are some weird shenanigans going on when returning a IS_UNDEF, but I don't think this will ever matter in practice, as having that value returned indicates that something went seriously wrong during the iteration and there should've been an exception anyways.

}

static void php_phongo_iterator_it_get_current_key(zend_object_iterator* iter, zval* key)
{
php_phongo_iterator_t* intern = Z_ITERATOR_OBJ_P(&iter->data);

if (!php_phongo_iterator_key(intern, key)) {
// Exception already thrown
return;
}
}

static void php_phongo_iterator_it_move_forward(zend_object_iterator* iter)
{
php_phongo_iterator_t* intern = Z_ITERATOR_OBJ_P(&iter->data);

php_phongo_iterator_next(intern);
}

static void php_phongo_iterator_it_rewind(zend_object_iterator* iter)
{
php_phongo_iterator_t* intern = Z_ITERATOR_OBJ_P(&iter->data);

php_phongo_iterator_rewind(intern);
}

#if PHP_VERSION_ID >= 80000
static HashTable* php_phongo_iterator_it_get_gc(zend_object_iterator* iter, zval** table, int* n)
{
*n = 1;
*table = &iter->data;
return NULL;
}
#endif

static const zend_object_iterator_funcs php_phongo_iterator_it_funcs = PHONGO_ITERATOR_FUNCS(
php_phongo_iterator_it_dtor,
php_phongo_iterator_it_valid,
php_phongo_iterator_it_get_current_data,
php_phongo_iterator_it_get_current_key,
php_phongo_iterator_it_move_forward,
php_phongo_iterator_it_rewind,
NULL, /* invalidate_current */
php_phongo_iterator_it_get_gc);

static zend_object_iterator* php_phongo_iterator_get_iterator(zend_class_entry* ce, zval* object, int by_ref)
{
zend_object_iterator* iterator;

if (by_ref) {
phongo_throw_exception(PHONGO_ERROR_LOGIC, "An iterator cannot be used with foreach by reference");
return NULL;
}
Comment on lines +368 to +371
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is precedent for this in PHP itself, although they emit an E_ERROR in such cases. I opted to throw instead as it's more consistent how we expose errors.

We can't support iteration by reference as the BSON structures themselves are read-only, so assigning to the decoded value makes no sense.


iterator = emalloc(sizeof(zend_object_iterator));
zend_iterator_init(iterator);

ZVAL_OBJ_COPY(&iterator->data, Z_OBJ_P(object));

iterator->funcs = &php_phongo_iterator_it_funcs;

return iterator;
}

void php_phongo_iterator_init_ce(INIT_FUNC_ARGS)
{
php_phongo_iterator_ce = register_class_MongoDB_BSON_Iterator(zend_ce_iterator);
php_phongo_iterator_ce->create_object = php_phongo_iterator_create_object;
php_phongo_iterator_ce->get_iterator = php_phongo_iterator_get_iterator;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quoting PHPC-2269's description:

If this speeds up iteration, we should consider adding the same logic to The MongoDB\Cursor class, which may suffer from the same performance overhead.

Given that this does appear to speed things up, would you like to create a separate issue for making a similar change to Cursor?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created PHPC-2278.

PHONGO_CE_DISABLE_SERIALIZATION(php_phongo_iterator_ce);

memcpy(&php_phongo_handler_iterator, phongo_get_std_object_handlers(), sizeof(zend_object_handlers));
Expand Down
38 changes: 38 additions & 0 deletions src/phongo_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,4 +303,42 @@ zend_bool zend_array_is_list(zend_array* array);
typedef ZEND_RESULT_CODE zend_result;
#endif

/* get_gc iterator handler was added in PHP 8.0 */
#if PHP_VERSION_ID >= 80000
#define PHONGO_ITERATOR_FUNCS(dtor, valid, get_current_data, get_current_key, move_forward, rewind, invalidate_current, get_gc) \
{ \
(dtor), \
(valid), \
(get_current_data), \
(get_current_key), \
(move_forward), \
(rewind), \
(invalidate_current), \
(get_gc), \
}
#else /* PHP_VERSION_ID < 80000 */
#define PHONGO_ITERATOR_FUNCS(dtor, valid, get_current_data, get_current_key, move_forward, rewind, invalidate_current, get_gc) \
{ \
(dtor), \
(valid), \
(get_current_data), \
(get_current_key), \
(move_forward), \
(rewind), \
(invalidate_current), \
}
#endif /* PHP_VERSION_ID >= 80000 */

/* ZVAL_OBJ_COPY was added in PHP 8.0 */
#ifndef ZVAL_OBJ_COPY
#define ZVAL_OBJ_COPY(z, o) \
do { \
zval* __z = (z); \
zend_object* __o = (o); \
GC_ADDREF(__o); \
Z_OBJ_P(__z) = __o; \
Z_TYPE_INFO_P(__z) = IS_OBJECT_EX; \
} while (0)
#endif

#endif /* PHONGO_COMPAT_H */