1
- /* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6 +)
1
+ /* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7 +)
2
2
__ __ _
3
3
___\ \/ /_ __ __ _| |_
4
4
/ _ \\ /| '_ \ / _` | __|
34
34
Copyright (c) 2019 Vadim Zeitlin <[email protected] >
35
35
Copyright (c) 2021 Dong-hee Na <[email protected] >
36
36
Copyright (c) 2022 Samanta Navarro <[email protected] >
37
+ Copyright (c) 2022 Jeffrey Walton <[email protected] >
37
38
Licensed under the MIT license:
38
39
39
40
Permission is hereby granted, free of charge, to any person obtaining
133
134
* BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
134
135
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
135
136
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
136
- * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
137
+ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
137
138
* Windows >=Vista (rand_s): _WIN32. \
138
139
\
139
140
If insist on not using any of these, bypass this error by defining \
@@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
722
723
return XML_ParserCreate_MM (encodingName , NULL , tmp );
723
724
}
724
725
726
+ // "xml=http://www.w3.org/XML/1998/namespace"
725
727
static const XML_Char implicitContext []
726
728
= {ASCII_x , ASCII_m , ASCII_l , ASCII_EQUALS , ASCII_h ,
727
729
ASCII_t , ASCII_t , ASCII_p , ASCII_COLON , ASCII_SLASH ,
@@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3704
3706
return XML_ERROR_NONE ;
3705
3707
}
3706
3708
3709
+ static XML_Bool
3710
+ is_rfc3986_uri_char (XML_Char candidate ) {
3711
+ // For the RFC 3986 ANBF grammar see
3712
+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3713
+
3714
+ switch (candidate ) {
3715
+ // From rule "ALPHA" (uppercase half)
3716
+ case 'A' :
3717
+ case 'B' :
3718
+ case 'C' :
3719
+ case 'D' :
3720
+ case 'E' :
3721
+ case 'F' :
3722
+ case 'G' :
3723
+ case 'H' :
3724
+ case 'I' :
3725
+ case 'J' :
3726
+ case 'K' :
3727
+ case 'L' :
3728
+ case 'M' :
3729
+ case 'N' :
3730
+ case 'O' :
3731
+ case 'P' :
3732
+ case 'Q' :
3733
+ case 'R' :
3734
+ case 'S' :
3735
+ case 'T' :
3736
+ case 'U' :
3737
+ case 'V' :
3738
+ case 'W' :
3739
+ case 'X' :
3740
+ case 'Y' :
3741
+ case 'Z' :
3742
+
3743
+ // From rule "ALPHA" (lowercase half)
3744
+ case 'a' :
3745
+ case 'b' :
3746
+ case 'c' :
3747
+ case 'd' :
3748
+ case 'e' :
3749
+ case 'f' :
3750
+ case 'g' :
3751
+ case 'h' :
3752
+ case 'i' :
3753
+ case 'j' :
3754
+ case 'k' :
3755
+ case 'l' :
3756
+ case 'm' :
3757
+ case 'n' :
3758
+ case 'o' :
3759
+ case 'p' :
3760
+ case 'q' :
3761
+ case 'r' :
3762
+ case 's' :
3763
+ case 't' :
3764
+ case 'u' :
3765
+ case 'v' :
3766
+ case 'w' :
3767
+ case 'x' :
3768
+ case 'y' :
3769
+ case 'z' :
3770
+
3771
+ // From rule "DIGIT"
3772
+ case '0' :
3773
+ case '1' :
3774
+ case '2' :
3775
+ case '3' :
3776
+ case '4' :
3777
+ case '5' :
3778
+ case '6' :
3779
+ case '7' :
3780
+ case '8' :
3781
+ case '9' :
3782
+
3783
+ // From rule "pct-encoded"
3784
+ case '%' :
3785
+
3786
+ // From rule "unreserved"
3787
+ case '-' :
3788
+ case '.' :
3789
+ case '_' :
3790
+ case '~' :
3791
+
3792
+ // From rule "gen-delims"
3793
+ case ':' :
3794
+ case '/' :
3795
+ case '?' :
3796
+ case '#' :
3797
+ case '[' :
3798
+ case ']' :
3799
+ case '@' :
3800
+
3801
+ // From rule "sub-delims"
3802
+ case '!' :
3803
+ case '$' :
3804
+ case '&' :
3805
+ case '\'' :
3806
+ case '(' :
3807
+ case ')' :
3808
+ case '*' :
3809
+ case '+' :
3810
+ case ',' :
3811
+ case ';' :
3812
+ case '=' :
3813
+ return XML_TRUE ;
3814
+
3815
+ default :
3816
+ return XML_FALSE ;
3817
+ }
3818
+ }
3819
+
3707
3820
/* addBinding() overwrites the value of prefix->binding without checking.
3708
3821
Therefore one must keep track of the old value outside of addBinding().
3709
3822
*/
3710
3823
static enum XML_Error
3711
3824
addBinding (XML_Parser parser , PREFIX * prefix , const ATTRIBUTE_ID * attId ,
3712
3825
const XML_Char * uri , BINDING * * bindingsPtr ) {
3826
+ // "http://www.w3.org/XML/1998/namespace"
3713
3827
static const XML_Char xmlNamespace []
3714
3828
= {ASCII_h , ASCII_t , ASCII_t , ASCII_p , ASCII_COLON ,
3715
3829
ASCII_SLASH , ASCII_SLASH , ASCII_w , ASCII_w , ASCII_w ,
@@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3720
3834
ASCII_e , ASCII_s , ASCII_p , ASCII_a , ASCII_c ,
3721
3835
ASCII_e , '\0' };
3722
3836
static const int xmlLen = (int )sizeof (xmlNamespace ) / sizeof (XML_Char ) - 1 ;
3837
+ // "http://www.w3.org/2000/xmlns/"
3723
3838
static const XML_Char xmlnsNamespace []
3724
3839
= {ASCII_h , ASCII_t , ASCII_t , ASCII_p , ASCII_COLON , ASCII_SLASH ,
3725
3840
ASCII_SLASH , ASCII_w , ASCII_w , ASCII_w , ASCII_PERIOD , ASCII_w ,
@@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
3760
3875
&& (len > xmlnsLen || uri [len ] != xmlnsNamespace [len ]))
3761
3876
isXMLNS = XML_FALSE ;
3762
3877
3763
- // NOTE: While Expat does not validate namespace URIs against RFC 3986,
3764
- // we have to at least make sure that the XML processor on top of
3765
- // Expat (that is splitting tag names by namespace separator into
3766
- // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
3767
- // by an attacker putting additional namespace separator characters
3768
- // into namespace declarations. That would be ambiguous and not to
3769
- // be expected.
3770
- if (parser -> m_ns && (uri [len ] == parser -> m_namespaceSeparator )) {
3878
+ // NOTE: While Expat does not validate namespace URIs against RFC 3986
3879
+ // today (and is not REQUIRED to do so with regard to the XML 1.0
3880
+ // namespaces specification) we have to at least make sure, that
3881
+ // the application on top of Expat (that is likely splitting expanded
3882
+ // element names ("qualified names") of form
3883
+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
3884
+ // in its element handler code) cannot be confused by an attacker
3885
+ // putting additional namespace separator characters into namespace
3886
+ // declarations. That would be ambiguous and not to be expected.
3887
+ //
3888
+ // While the HTML API docs of function XML_ParserCreateNS have been
3889
+ // advising against use of a namespace separator character that can
3890
+ // appear in a URI for >20 years now, some widespread applications
3891
+ // are using URI characters (':' (colon) in particular) for a
3892
+ // namespace separator, in practice. To keep these applications
3893
+ // functional, we only reject namespaces URIs containing the
3894
+ // application-chosen namespace separator if the chosen separator
3895
+ // is a non-URI character with regard to RFC 3986.
3896
+ if (parser -> m_ns && (uri [len ] == parser -> m_namespaceSeparator )
3897
+ && ! is_rfc3986_uri_char (uri [len ])) {
3771
3898
return XML_ERROR_SYNTAX ;
3772
3899
}
3773
3900
}
0 commit comments