@@ -179,37 +179,30 @@ def comments(self):
179
179
180
180
181
181
class UnstructuredTokenList (TokenList ):
182
-
183
182
token_type = 'unstructured'
184
183
185
184
186
185
class Phrase (TokenList ):
187
-
188
186
token_type = 'phrase'
189
187
190
188
class Word (TokenList ):
191
-
192
189
token_type = 'word'
193
190
194
191
195
192
class CFWSList (WhiteSpaceTokenList ):
196
-
197
193
token_type = 'cfws'
198
194
199
195
200
196
class Atom (TokenList ):
201
-
202
197
token_type = 'atom'
203
198
204
199
205
200
class Token (TokenList ):
206
-
207
201
token_type = 'token'
208
202
encode_as_ew = False
209
203
210
204
211
205
class EncodedWord (TokenList ):
212
-
213
206
token_type = 'encoded-word'
214
207
cte = None
215
208
charset = None
@@ -496,16 +489,19 @@ def domain(self):
496
489
497
490
498
491
class DotAtom (TokenList ):
499
-
500
492
token_type = 'dot-atom'
501
493
502
494
503
495
class DotAtomText (TokenList ):
504
-
505
496
token_type = 'dot-atom-text'
506
497
as_ew_allowed = True
507
498
508
499
500
+ class NoFoldLiteral (TokenList ):
501
+ token_type = 'no-fold-literal'
502
+ as_ew_allowed = False
503
+
504
+
509
505
class AddrSpec (TokenList ):
510
506
511
507
token_type = 'addr-spec'
@@ -809,35 +805,42 @@ def params(self):
809
805
810
806
811
807
class ContentType (ParameterizedHeaderValue ):
812
-
813
808
token_type = 'content-type'
814
809
as_ew_allowed = False
815
810
maintype = 'text'
816
811
subtype = 'plain'
817
812
818
813
819
814
class ContentDisposition (ParameterizedHeaderValue ):
820
-
821
815
token_type = 'content-disposition'
822
816
as_ew_allowed = False
823
817
content_disposition = None
824
818
825
819
826
820
class ContentTransferEncoding (TokenList ):
827
-
828
821
token_type = 'content-transfer-encoding'
829
822
as_ew_allowed = False
830
823
cte = '7bit'
831
824
832
825
833
826
class HeaderLabel (TokenList ):
834
-
835
827
token_type = 'header-label'
836
828
as_ew_allowed = False
837
829
838
830
839
- class Header (TokenList ):
831
+ class MsgID (TokenList ):
832
+ token_type = 'msg-id'
833
+ as_ew_allowed = False
834
+
835
+ def fold (self , policy ):
836
+ # message-id tokens may not be folded.
837
+ return str (self ) + policy .linesep
838
+
839
+ class MessageID (MsgID ):
840
+ token_type = 'message-id'
840
841
842
+
843
+ class Header (TokenList ):
841
844
token_type = 'header'
842
845
843
846
@@ -1583,7 +1586,7 @@ def get_addr_spec(value):
1583
1586
addr_spec .append (token )
1584
1587
if not value or value [0 ] != '@' :
1585
1588
addr_spec .defects .append (errors .InvalidHeaderDefect (
1586
- "add -spec local part with no domain" ))
1589
+ "addr -spec local part with no domain" ))
1587
1590
return addr_spec , value
1588
1591
addr_spec .append (ValueTerminal ('@' , 'address-at-symbol' ))
1589
1592
token , value = get_domain (value [1 :])
@@ -1968,6 +1971,110 @@ def get_address_list(value):
1968
1971
value = value [1 :]
1969
1972
return address_list , value
1970
1973
1974
+
1975
+ def get_no_fold_literal (value ):
1976
+ """ no-fold-literal = "[" *dtext "]"
1977
+ """
1978
+ no_fold_literal = NoFoldLiteral ()
1979
+ if not value :
1980
+ raise errors .HeaderParseError (
1981
+ "expected no-fold-literal but found '{}'" .format (value ))
1982
+ if value [0 ] != '[' :
1983
+ raise errors .HeaderParseError (
1984
+ "expected '[' at the start of no-fold-literal "
1985
+ "but found '{}'" .format (value ))
1986
+ no_fold_literal .append (ValueTerminal ('[' , 'no-fold-literal-start' ))
1987
+ value = value [1 :]
1988
+ token , value = get_dtext (value )
1989
+ no_fold_literal .append (token )
1990
+ if not value or value [0 ] != ']' :
1991
+ raise errors .HeaderParseError (
1992
+ "expected ']' at the end of no-fold-literal "
1993
+ "but found '{}'" .format (value ))
1994
+ no_fold_literal .append (ValueTerminal (']' , 'no-fold-literal-end' ))
1995
+ return no_fold_literal , value [1 :]
1996
+
1997
+ def get_msg_id (value ):
1998
+ """msg-id = [CFWS] "<" id-left '@' id-right ">" [CFWS]
1999
+ id-left = dot-atom-text / obs-id-left
2000
+ id-right = dot-atom-text / no-fold-literal / obs-id-right
2001
+ no-fold-literal = "[" *dtext "]"
2002
+ """
2003
+ msg_id = MsgID ()
2004
+ if value [0 ] in CFWS_LEADER :
2005
+ token , value = get_cfws (value )
2006
+ msg_id .append (token )
2007
+ if not value or value [0 ] != '<' :
2008
+ raise errors .HeaderParseError (
2009
+ "expected msg-id but found '{}'" .format (value ))
2010
+ msg_id .append (ValueTerminal ('<' , 'msg-id-start' ))
2011
+ value = value [1 :]
2012
+ # Parse id-left.
2013
+ try :
2014
+ token , value = get_dot_atom_text (value )
2015
+ except errors .HeaderParseError :
2016
+ try :
2017
+ # obs-id-left is same as local-part of add-spec.
2018
+ token , value = get_obs_local_part (value )
2019
+ msg_id .defects .append (errors .ObsoleteHeaderDefect (
2020
+ "obsolete id-left in msg-id" ))
2021
+ except errors .HeaderParseError :
2022
+ raise errors .HeaderParseError (
2023
+ "expected dot-atom-text or obs-id-left"
2024
+ " but found '{}'" .format (value ))
2025
+ msg_id .append (token )
2026
+ if not value or value [0 ] != '@' :
2027
+ msg_id .defects .append (errors .InvalidHeaderDefect (
2028
+ "msg-id with no id-right" ))
2029
+ # Even though there is no id-right, if the local part
2030
+ # ends with `>` let's just parse it too and return
2031
+ # along with the defect.
2032
+ if value and value [0 ] == '>' :
2033
+ msg_id .append (ValueTerminal ('>' , 'msg-id-end' ))
2034
+ value = value [1 :]
2035
+ return msg_id , value
2036
+ msg_id .append (ValueTerminal ('@' , 'address-at-symbol' ))
2037
+ value = value [1 :]
2038
+ # Parse id-right.
2039
+ try :
2040
+ token , value = get_dot_atom_text (value )
2041
+ except errors .HeaderParseError :
2042
+ try :
2043
+ token , value = get_no_fold_literal (value )
2044
+ except errors .HeaderParseError as e :
2045
+ try :
2046
+ token , value = get_domain (value )
2047
+ msg_id .defects .append (errors .ObsoleteHeaderDefect (
2048
+ "obsolete id-right in msg-id" ))
2049
+ except errors .HeaderParseError :
2050
+ raise errors .HeaderParseError (
2051
+ "expected dot-atom-text, no-fold-literal or obs-id-right"
2052
+ " but found '{}'" .format (value ))
2053
+ msg_id .append (token )
2054
+ if value and value [0 ] == '>' :
2055
+ value = value [1 :]
2056
+ else :
2057
+ msg_id .defects .append (errors .InvalidHeaderDefect (
2058
+ "missing trailing '>' on msg-id" ))
2059
+ msg_id .append (ValueTerminal ('>' , 'msg-id-end' ))
2060
+ if value and value [0 ] in CFWS_LEADER :
2061
+ token , value = get_cfws (value )
2062
+ msg_id .append (token )
2063
+ return msg_id , value
2064
+
2065
+
2066
+ def parse_message_id (value ):
2067
+ """message-id = "Message-ID:" msg-id CRLF
2068
+ """
2069
+ message_id = MessageID ()
2070
+ try :
2071
+ token , value = get_msg_id (value )
2072
+ except errors .HeaderParseError :
2073
+ message_id .defects .append (errors .InvalidHeaderDefect (
2074
+ "Expected msg-id but found {!r}" .format (value )))
2075
+ message_id .append (token )
2076
+ return message_id
2077
+
1971
2078
#
1972
2079
# XXX: As I begin to add additional header parsers, I'm realizing we probably
1973
2080
# have two level of parser routines: the get_XXX methods that get a token in
0 commit comments