1
- /* 2e2c8ce5f11a473d65ec313ab20ceee6afefb355f5405afc06e7204e2e41c8c0 (2.4.4 +)
1
+ /* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6 +)
2
2
__ __ _
3
3
___\ \/ /_ __ __ _| |_
4
4
/ _ \\ /| '_ \ / _` | __|
11
11
Copyright (c) 2000-2006 Fred L. Drake, Jr. <[email protected] >
12
12
Copyright (c) 2001-2002 Greg Stein <[email protected] >
13
13
Copyright (c) 2002-2016 Karl Waclawek <[email protected] >
14
- Copyright (c) 2005-2009 Steven Solie <[email protected] >
14
+ Copyright (c) 2005-2009 Steven Solie <[email protected] >
15
15
Copyright (c) 2016 Eric Rahm <[email protected] >
16
16
Copyright (c) 2016-2022 Sebastian Pipping <[email protected] >
17
17
Copyright (c) 2016 Gaurav <[email protected] >
@@ -718,8 +718,7 @@ XML_ParserCreate(const XML_Char *encodingName) {
718
718
719
719
XML_Parser XMLCALL
720
720
XML_ParserCreateNS (const XML_Char * encodingName , XML_Char nsSep ) {
721
- XML_Char tmp [2 ];
722
- * tmp = nsSep ;
721
+ XML_Char tmp [2 ] = {nsSep , 0 };
723
722
return XML_ParserCreate_MM (encodingName , NULL , tmp );
724
723
}
725
724
@@ -1344,8 +1343,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1344
1343
would be otherwise.
1345
1344
*/
1346
1345
if (parser -> m_ns ) {
1347
- XML_Char tmp [2 ];
1348
- * tmp = parser -> m_namespaceSeparator ;
1346
+ XML_Char tmp [2 ] = {parser -> m_namespaceSeparator , 0 };
1349
1347
parser = parserCreate (encodingName , & parser -> m_mem , tmp , newDtd );
1350
1348
} else {
1351
1349
parser = parserCreate (encodingName , & parser -> m_mem , NULL , newDtd );
@@ -2563,6 +2561,7 @@ storeRawNames(XML_Parser parser) {
2563
2561
while (tag ) {
2564
2562
int bufSize ;
2565
2563
int nameLen = sizeof (XML_Char ) * (tag -> name .strLen + 1 );
2564
+ size_t rawNameLen ;
2566
2565
char * rawNameBuf = tag -> buf + nameLen ;
2567
2566
/* Stop if already stored. Since m_tagStack is a stack, we can stop
2568
2567
at the first entry that has already been copied; everything
@@ -2574,7 +2573,11 @@ storeRawNames(XML_Parser parser) {
2574
2573
/* For re-use purposes we need to ensure that the
2575
2574
size of tag->buf is a multiple of sizeof(XML_Char).
2576
2575
*/
2577
- bufSize = nameLen + ROUND_UP (tag -> rawNameLength , sizeof (XML_Char ));
2576
+ rawNameLen = ROUND_UP (tag -> rawNameLength , sizeof (XML_Char ));
2577
+ /* Detect and prevent integer overflow. */
2578
+ if (rawNameLen > (size_t )INT_MAX - nameLen )
2579
+ return XML_FALSE ;
2580
+ bufSize = nameLen + (int )rawNameLen ;
2578
2581
if (bufSize > tag -> bufEnd - tag -> buf ) {
2579
2582
char * temp = (char * )REALLOC (parser , tag -> buf , bufSize );
2580
2583
if (temp == NULL )
@@ -3756,6 +3759,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3756
3759
if (! mustBeXML && isXMLNS
3757
3760
&& (len > xmlnsLen || uri [len ] != xmlnsNamespace [len ]))
3758
3761
isXMLNS = XML_FALSE ;
3762
+
3763
+ // NOTE: While Expat does not validate namespace URIs against RFC 3986,
3764
+ // we have to at least make sure that the XML processor on top of
3765
+ // Expat (that is splitting tag names by namespace separator into
3766
+ // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
3767
+ // by an attacker putting additional namespace separator characters
3768
+ // into namespace declarations. That would be ambiguous and not to
3769
+ // be expected.
3770
+ if (parser -> m_ns && (uri [len ] == parser -> m_namespaceSeparator )) {
3771
+ return XML_ERROR_SYNTAX ;
3772
+ }
3759
3773
}
3760
3774
isXML = isXML && len == xmlLen ;
3761
3775
isXMLNS = isXMLNS && len == xmlnsLen ;
@@ -7317,44 +7331,15 @@ nextScaffoldPart(XML_Parser parser) {
7317
7331
return next ;
7318
7332
}
7319
7333
7320
- static void
7321
- build_node (XML_Parser parser , int src_node , XML_Content * dest ,
7322
- XML_Content * * contpos , XML_Char * * strpos ) {
7323
- DTD * const dtd = parser -> m_dtd ; /* save one level of indirection */
7324
- dest -> type = dtd -> scaffold [src_node ].type ;
7325
- dest -> quant = dtd -> scaffold [src_node ].quant ;
7326
- if (dest -> type == XML_CTYPE_NAME ) {
7327
- const XML_Char * src ;
7328
- dest -> name = * strpos ;
7329
- src = dtd -> scaffold [src_node ].name ;
7330
- for (;;) {
7331
- * (* strpos )++ = * src ;
7332
- if (! * src )
7333
- break ;
7334
- src ++ ;
7335
- }
7336
- dest -> numchildren = 0 ;
7337
- dest -> children = NULL ;
7338
- } else {
7339
- unsigned int i ;
7340
- int cn ;
7341
- dest -> numchildren = dtd -> scaffold [src_node ].childcnt ;
7342
- dest -> children = * contpos ;
7343
- * contpos += dest -> numchildren ;
7344
- for (i = 0 , cn = dtd -> scaffold [src_node ].firstchild ; i < dest -> numchildren ;
7345
- i ++ , cn = dtd -> scaffold [cn ].nextsib ) {
7346
- build_node (parser , cn , & (dest -> children [i ]), contpos , strpos );
7347
- }
7348
- dest -> name = NULL ;
7349
- }
7350
- }
7351
-
7352
7334
static XML_Content *
7353
7335
build_model (XML_Parser parser ) {
7336
+ /* Function build_model transforms the existing parser->m_dtd->scaffold
7337
+ * array of CONTENT_SCAFFOLD tree nodes into a new array of
7338
+ * XML_Content tree nodes followed by a gapless list of zero-terminated
7339
+ * strings. */
7354
7340
DTD * const dtd = parser -> m_dtd ; /* save one level of indirection */
7355
7341
XML_Content * ret ;
7356
- XML_Content * cpos ;
7357
- XML_Char * str ;
7342
+ XML_Char * str ; /* the current string writing location */
7358
7343
7359
7344
/* Detect and prevent integer overflow.
7360
7345
* The preprocessor guard addresses the "always false" warning
@@ -7380,10 +7365,96 @@ build_model(XML_Parser parser) {
7380
7365
if (! ret )
7381
7366
return NULL ;
7382
7367
7383
- str = (XML_Char * )(& ret [dtd -> scaffCount ]);
7384
- cpos = & ret [1 ];
7368
+ /* What follows is an iterative implementation (of what was previously done
7369
+ * recursively in a dedicated function called "build_node". The old recursive
7370
+ * build_node could be forced into stack exhaustion from input as small as a
7371
+ * few megabyte, and so that was a security issue. Hence, a function call
7372
+ * stack is avoided now by resolving recursion.)
7373
+ *
7374
+ * The iterative approach works as follows:
7375
+ *
7376
+ * - We have two writing pointers, both walking up the result array; one does
7377
+ * the work, the other creates "jobs" for its colleague to do, and leads
7378
+ * the way:
7379
+ *
7380
+ * - The faster one, pointer jobDest, always leads and writes "what job
7381
+ * to do" by the other, once they reach that place in the
7382
+ * array: leader "jobDest" stores the source node array index (relative
7383
+ * to array dtd->scaffold) in field "numchildren".
7384
+ *
7385
+ * - The slower one, pointer dest, looks at the value stored in the
7386
+ * "numchildren" field (which actually holds a source node array index
7387
+ * at that time) and puts the real data from dtd->scaffold in.
7388
+ *
7389
+ * - Before the loop starts, jobDest writes source array index 0
7390
+ * (where the root node is located) so that dest will have something to do
7391
+ * when it starts operation.
7392
+ *
7393
+ * - Whenever nodes with children are encountered, jobDest appends
7394
+ * them as new jobs, in order. As a result, tree node siblings are
7395
+ * adjacent in the resulting array, for example:
7396
+ *
7397
+ * [0] root, has two children
7398
+ * [1] first child of 0, has three children
7399
+ * [3] first child of 1, does not have children
7400
+ * [4] second child of 1, does not have children
7401
+ * [5] third child of 1, does not have children
7402
+ * [2] second child of 0, does not have children
7403
+ *
7404
+ * Or (the same data) presented in flat array view:
7405
+ *
7406
+ * [0] root, has two children
7407
+ *
7408
+ * [1] first child of 0, has three children
7409
+ * [2] second child of 0, does not have children
7410
+ *
7411
+ * [3] first child of 1, does not have children
7412
+ * [4] second child of 1, does not have children
7413
+ * [5] third child of 1, does not have children
7414
+ *
7415
+ * - The algorithm repeats until all target array indices have been processed.
7416
+ */
7417
+ XML_Content * dest = ret ; /* tree node writing location, moves upwards */
7418
+ XML_Content * const destLimit = & ret [dtd -> scaffCount ];
7419
+ XML_Content * jobDest = ret ; /* next free writing location in target array */
7420
+ str = (XML_Char * )& ret [dtd -> scaffCount ];
7421
+
7422
+ /* Add the starting job, the root node (index 0) of the source tree */
7423
+ (jobDest ++ )-> numchildren = 0 ;
7424
+
7425
+ for (; dest < destLimit ; dest ++ ) {
7426
+ /* Retrieve source tree array index from job storage */
7427
+ const int src_node = (int )dest -> numchildren ;
7428
+
7429
+ /* Convert item */
7430
+ dest -> type = dtd -> scaffold [src_node ].type ;
7431
+ dest -> quant = dtd -> scaffold [src_node ].quant ;
7432
+ if (dest -> type == XML_CTYPE_NAME ) {
7433
+ const XML_Char * src ;
7434
+ dest -> name = str ;
7435
+ src = dtd -> scaffold [src_node ].name ;
7436
+ for (;;) {
7437
+ * str ++ = * src ;
7438
+ if (! * src )
7439
+ break ;
7440
+ src ++ ;
7441
+ }
7442
+ dest -> numchildren = 0 ;
7443
+ dest -> children = NULL ;
7444
+ } else {
7445
+ unsigned int i ;
7446
+ int cn ;
7447
+ dest -> name = NULL ;
7448
+ dest -> numchildren = dtd -> scaffold [src_node ].childcnt ;
7449
+ dest -> children = jobDest ;
7450
+
7451
+ /* Append scaffold indices of children to array */
7452
+ for (i = 0 , cn = dtd -> scaffold [src_node ].firstchild ;
7453
+ i < dest -> numchildren ; i ++ , cn = dtd -> scaffold [cn ].nextsib )
7454
+ (jobDest ++ )-> numchildren = (unsigned int )cn ;
7455
+ }
7456
+ }
7385
7457
7386
- build_node (parser , 0 , ret , & cpos , & str );
7387
7458
return ret ;
7388
7459
}
7389
7460
@@ -7412,7 +7483,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
7412
7483
7413
7484
static XML_Char *
7414
7485
copyString (const XML_Char * s , const XML_Memory_Handling_Suite * memsuite ) {
7415
- int charsRequired = 0 ;
7486
+ size_t charsRequired = 0 ;
7416
7487
XML_Char * result ;
7417
7488
7418
7489
/* First determine how long the string is */
0 commit comments