Skip to content

Commit acf2762

Browse files
committed
[RFC] Add stream open functions to XML{Reader,Writer}
RFC: https://wiki.php.net/rfc/xmlreader_writer_streams
1 parent ca3b248 commit acf2762

22 files changed

+592
-42
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,10 @@ PHP NEWS
324324

325325
- XMLReader:
326326
. Declares class constant types. (Ayesh)
327+
. Add XMLReader::fromStream(). (nielsdos)
328+
329+
- XMLWriter:
330+
. Add XMLWriter::toStream(). (nielsdos)
327331

328332
- XSL:
329333
. Implement request #64137 (XSLTProcessor::setParameter() should allow both

UPGRADING

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,15 @@ PHP 8.4 UPGRADE NOTES
178178
Passing an empty string to disable the handler is still allowed,
179179
but not recommended.
180180

181+
- XMLReader:
182+
. Passing an invalid character encoding to XMLReader::open() or
183+
XMLReader::XML() now throws a ValueError. Passing a string containing NULL
184+
bytes previously emitted a warning and now throws a ValueError as well.
185+
186+
- XMLWriter:
187+
. Passing a string containing NULL bytes previously emitted a warning and
188+
now throws a ValueError.
189+
181190
- XSL:
182191
. XSLTProcessor::setParameter() will now throw a ValueError when its arguments
183192
contain null bytes. This never actually worked correctly in the first place,
@@ -619,6 +628,14 @@ PHP 8.4 UPGRADE NOTES
619628
array_any().
620629
RFC: https://wiki.php.net/rfc/array_find
621630

631+
- XMLReader:
632+
. Added XMLReader::fromStream().
633+
RFC: https://wiki.php.net/rfc/xmlreader_writer_streams
634+
635+
- XMLWriter:
636+
. Added XMLWriter::toStream().
637+
RFC: https://wiki.php.net/rfc/xmlreader_writer_streams
638+
622639
- XSL:
623640
. Added XSLTProcessor::registerPhpFunctionNS().
624641
RFC: https://wiki.php.net/rfc/improve_callbacks_dom_and_xsl

ext/xmlreader/php_xmlreader.c

Lines changed: 95 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,7 @@ static zend_function *xmlreader_get_method(zend_object **obj, zend_string *name,
167167
/* There are only two static internal methods and they both have overrides. */
168168
if (ZSTR_LEN(name) == sizeof("xml") - 1) {
169169
return (zend_function *) &xmlreader_xml_fn;
170-
} else {
171-
ZEND_ASSERT(ZSTR_LEN(name) == sizeof("open") - 1);
170+
} else if (ZSTR_LEN(name) == sizeof("open") - 1) {
172171
return (zend_function *) &xmlreader_open_fn;
173172
}
174173
}
@@ -799,6 +798,22 @@ PHP_METHOD(XMLReader, next)
799798
}
800799
/* }}} */
801800

801+
static bool xmlreader_valid_encoding(const char *encoding)
802+
{
803+
if (!encoding) {
804+
return true;
805+
}
806+
807+
/* Normally we could use xmlTextReaderConstEncoding() afterwards but libxml2 < 2.12.0 has a bug of course
808+
* where it returns NULL for some valid encodings instead. */
809+
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
810+
if (!handler) {
811+
return false;
812+
}
813+
xmlCharEncCloseFunc(handler);
814+
return true;
815+
}
816+
802817
/* {{{ Sets the URI that the XMLReader will parse. */
803818
PHP_METHOD(XMLReader, open)
804819
{
@@ -811,7 +826,7 @@ PHP_METHOD(XMLReader, open)
811826
char resolved_path[MAXPATHLEN + 1];
812827
xmlTextReaderPtr reader = NULL;
813828

814-
if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|s!l", &source, &source_len, &encoding, &encoding_len, &options) == FAILURE) {
829+
if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|p!l", &source, &source_len, &encoding, &encoding_len, &options) == FAILURE) {
815830
RETURN_THROWS();
816831
}
817832

@@ -827,9 +842,9 @@ PHP_METHOD(XMLReader, open)
827842
RETURN_THROWS();
828843
}
829844

830-
if (encoding && CHECK_NULL_PATH(encoding, encoding_len)) {
831-
php_error_docref(NULL, E_WARNING, "Encoding must not contain NUL bytes");
832-
RETURN_FALSE;
845+
if (!xmlreader_valid_encoding(encoding)) {
846+
zend_argument_value_error(2, "must be a valid character encoding");
847+
RETURN_THROWS();
833848
}
834849

835850
valid_file = _xmlreader_get_valid_file_path(source, resolved_path, MAXPATHLEN );
@@ -859,6 +874,76 @@ PHP_METHOD(XMLReader, open)
859874
}
860875
/* }}} */
861876

877+
static int xml_reader_stream_read(void *context, char *buffer, int len)
878+
{
879+
zend_resource *resource = context;
880+
if (EXPECTED(resource->ptr)) {
881+
php_stream *stream = resource->ptr;
882+
return php_stream_read(stream, buffer, len);
883+
}
884+
return -1;
885+
}
886+
887+
static int xml_reader_stream_close(void *context)
888+
{
889+
zend_resource *resource = context;
890+
/* Don't close it as others may still use it! We don't own the resource!
891+
* Just delete our reference (and clean up if we're the last one). */
892+
zend_list_delete(resource);
893+
return 0;
894+
}
895+
896+
PHP_METHOD(XMLReader, fromStream)
897+
{
898+
zval *stream_zv;
899+
php_stream *stream;
900+
char *document_uri = NULL;
901+
char *encoding_name = NULL;
902+
size_t document_uri_len, encoding_name_len;
903+
zend_long flags = 0;
904+
905+
ZEND_PARSE_PARAMETERS_START(1, 4)
906+
Z_PARAM_RESOURCE(stream_zv);
907+
Z_PARAM_OPTIONAL
908+
Z_PARAM_PATH_OR_NULL(encoding_name, encoding_name_len)
909+
Z_PARAM_LONG(flags)
910+
Z_PARAM_PATH_OR_NULL(document_uri, document_uri_len)
911+
ZEND_PARSE_PARAMETERS_END();
912+
913+
php_stream_from_res(stream, Z_RES_P(stream_zv));
914+
915+
if (!xmlreader_valid_encoding(encoding_name)) {
916+
zend_argument_value_error(2, "must be a valid character encoding");
917+
RETURN_THROWS();
918+
}
919+
920+
PHP_LIBXML_SANITIZE_GLOBALS(reader_for_stream);
921+
xmlTextReaderPtr reader = xmlReaderForIO(
922+
xml_reader_stream_read,
923+
xml_reader_stream_close,
924+
stream->res,
925+
document_uri,
926+
encoding_name,
927+
flags
928+
);
929+
PHP_LIBXML_RESTORE_GLOBALS(reader_for_stream);
930+
931+
if (UNEXPECTED(reader == NULL)) {
932+
zend_throw_error(NULL, "Could not construct libxml reader");
933+
RETURN_THROWS();
934+
}
935+
936+
/* When the reader is closed (even in error paths) the reference is destroyed. */
937+
Z_ADDREF_P(stream_zv);
938+
939+
if (object_init_with_constructor(return_value, Z_CE_P(ZEND_THIS), 0, NULL, NULL) == SUCCESS) {
940+
xmlreader_object *intern = Z_XMLREADER_P(return_value);
941+
intern->ptr = reader;
942+
} else {
943+
xmlFreeTextReader(reader);
944+
}
945+
}
946+
862947
/* Not Yet Implemented in libxml - functions exist just not coded
863948
PHP_METHOD(XMLReader, resetState)
864949
{
@@ -995,7 +1080,7 @@ PHP_METHOD(XMLReader, XML)
9951080
xmlParserInputBufferPtr inputbfr;
9961081
xmlTextReaderPtr reader;
9971082

998-
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!l", &source, &source_len, &encoding, &encoding_len, &options) == FAILURE) {
1083+
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|p!l", &source, &source_len, &encoding, &encoding_len, &options) == FAILURE) {
9991084
RETURN_THROWS();
10001085
}
10011086

@@ -1011,9 +1096,9 @@ PHP_METHOD(XMLReader, XML)
10111096
RETURN_THROWS();
10121097
}
10131098

1014-
if (encoding && CHECK_NULL_PATH(encoding, encoding_len)) {
1015-
php_error_docref(NULL, E_WARNING, "Encoding must not contain NUL bytes");
1016-
RETURN_FALSE;
1099+
if (!xmlreader_valid_encoding(encoding)) {
1100+
zend_argument_value_error(2, "must be a valid character encoding");
1101+
RETURN_THROWS();
10171102
}
10181103

10191104
inputbfr = xmlParserInputBufferCreateMem(source, source_len, XML_CHAR_ENCODING_NONE);

ext/xmlreader/php_xmlreader.stub.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ public function next(?string $name = null): bool {}
175175
/** @return bool|XMLReader */
176176
public static function open(string $uri, ?string $encoding = null, int $flags = 0) {} // TODO Return type shouldn't be dependent on the call scope
177177

178+
/** @param resource $stream */
179+
public static function fromStream($stream, ?string $encoding = null, int $flags = 0, ?string $documentUri = null): static {}
180+
178181
/** @tentative-return-type */
179182
public function readInnerXml(): string {}
180183

ext/xmlreader/php_xmlreader_arginfo.h

Lines changed: 10 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ext/xmlreader/tests/bug73246.phpt

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,27 @@
1-
--TEST--
2-
Bug #73246 (XMLReader: encoding length not checked)
1+
--TEST--
2+
Bug #73246 (XMLReader: encoding length not checked)
33
--EXTENSIONS--
44
xmlreader
5-
--FILE--
6-
<?php
7-
$reader = new XMLReader();
8-
$reader->open(__FILE__, "UTF\0-8");
9-
$reader->XML('<?xml version="1.0"?><root/>', "UTF\0-8");
10-
?>
11-
--EXPECTF--
12-
Warning: XMLReader::open(): Encoding must not contain NUL bytes in %s on line %d
13-
14-
Warning: XMLReader::XML(): Encoding must not contain NUL bytes in %s on line %d
5+
--FILE--
6+
<?php
7+
$reader = new XMLReader();
8+
try {
9+
$reader->open(__FILE__, "UTF\0-8");
10+
} catch (ValueError $e) {
11+
echo $e->getMessage(), "\n";
12+
}
13+
try {
14+
$reader->XML('<?xml version="1.0"?><root/>', "UTF\0-8");
15+
} catch (ValueError $e) {
16+
echo $e->getMessage(), "\n";
17+
}
18+
try {
19+
XMLReader::fromStream(fopen('php://memory', 'r'), encoding: "UTF\0-8");
20+
} catch (ValueError $e) {
21+
echo $e->getMessage(), "\n";
22+
}
23+
?>
24+
--EXPECT--
25+
XMLReader::open(): Argument #2 ($encoding) must not contain any null bytes
26+
XMLReader::XML(): Argument #2 ($encoding) must not contain any null bytes
27+
XMLReader::fromStream(): Argument #2 ($encoding) must not contain any null bytes
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
XMLReader::fromStream() - broken stream
3+
--EXTENSIONS--
4+
xmlreader
5+
--FILE--
6+
<?php
7+
$h = fopen("php://memory", "w+");
8+
fwrite($h, "<root><!--my comment-->");
9+
fseek($h, 0);
10+
11+
$reader = XMLReader::fromStream($h, encoding: "UTF-8");
12+
$start = true;
13+
while ($result = @$reader->read()) {
14+
var_dump($result);
15+
switch ($reader->nodeType) {
16+
case XMLReader::ELEMENT:
17+
echo "Element: ", $reader->name, "\n";
18+
break;
19+
case XMLReader::COMMENT:
20+
echo "Comment: ", $reader->value, "\n";
21+
break;
22+
}
23+
24+
if ($start) {
25+
fwrite($h, "<child/></root>");
26+
fclose($h);
27+
$start = false;
28+
}
29+
}
30+
var_dump($reader->depth);
31+
?>
32+
--EXPECT--
33+
bool(true)
34+
Element: root
35+
bool(true)
36+
Comment: my comment
37+
int(1)
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
--TEST--
2+
XMLReader::fromStream() - custom constructor
3+
--EXTENSIONS--
4+
xmlreader
5+
--FILE--
6+
<?php
7+
class CustomXMLReader extends XMLReader {
8+
public int $myField;
9+
10+
public function __construct() {
11+
$this->myField = 1234;
12+
echo "hello world\n";
13+
}
14+
}
15+
16+
$h = fopen("php://memory", "w+");
17+
fwrite($h, "<root/>");
18+
fseek($h, 0);
19+
20+
$reader = CustomXMLReader::fromStream($h, encoding: "UTF-8");
21+
var_dump($reader);
22+
var_dump($reader->read());
23+
var_dump($reader->nodeType);
24+
25+
fclose($h);
26+
?>
27+
--EXPECTF--
28+
hello world
29+
object(CustomXMLReader)#%d (1) {
30+
["attributeCount"]=>
31+
uninitialized(int)
32+
["baseURI"]=>
33+
uninitialized(string)
34+
["depth"]=>
35+
uninitialized(int)
36+
["hasAttributes"]=>
37+
uninitialized(bool)
38+
["hasValue"]=>
39+
uninitialized(bool)
40+
["isDefault"]=>
41+
uninitialized(bool)
42+
["isEmptyElement"]=>
43+
uninitialized(bool)
44+
["localName"]=>
45+
uninitialized(string)
46+
["name"]=>
47+
uninitialized(string)
48+
["namespaceURI"]=>
49+
uninitialized(string)
50+
["nodeType"]=>
51+
uninitialized(int)
52+
["prefix"]=>
53+
uninitialized(string)
54+
["value"]=>
55+
uninitialized(string)
56+
["xmlLang"]=>
57+
uninitialized(string)
58+
["myField"]=>
59+
int(1234)
60+
}
61+
bool(true)
62+
int(1)

0 commit comments

Comments
 (0)