@@ -72,6 +72,13 @@ def reset(self):
72
72
def close (self ):
73
73
"""Handle any buffered data."""
74
74
super ().close ()
75
+ if len (self .rawdata ):
76
+ # Temp fix for https://bugs.python.org/issue41989
77
+ # TODO: remove this when the bug is fixed in all supported Python versions.
78
+ if self .convert_charrefs and not self .cdata_elem : # pragma: no cover
79
+ self .handle_data (htmlparser .unescape (self .rawdata ))
80
+ else :
81
+ self .handle_data (self .rawdata )
75
82
# Handle any unclosed tags.
76
83
if len (self ._cache ):
77
84
self .cleandoc .append (self .md .htmlStash .store ('' .join (self ._cache )))
@@ -124,6 +131,9 @@ def handle_starttag(self, tag, attrs):
124
131
self ._cache .append (text )
125
132
else :
126
133
self .cleandoc .append (text )
134
+ if tag in self .CDATA_CONTENT_ELEMENTS :
135
+ # This is presumably a standalone tag in a code span (see #1036).
136
+ self .clear_cdata_mode ()
127
137
128
138
def handle_endtag (self , tag ):
129
139
text = self .get_endtag_text (tag )
@@ -200,3 +210,63 @@ def handle_pi(self, data):
200
210
def unknown_decl (self , data ):
201
211
end = ']]>' if data .startswith ('CDATA[' ) else ']>'
202
212
self .handle_empty_tag ('<![{}{}' .format (data , end ), is_block = True )
213
+
214
+ # The rest has been copied from base class in standard lib to address #1036.
215
+ # As __startag_text is private, all references to it must be in this subclass.
216
+ # The last few lines of parse_starttag are reversed so that handle_starttag
217
+ # can override cdata_mode in certain situations (in a code span).
218
+ __starttag_text = None
219
+
220
+ def get_starttag_text (self ):
221
+ """Return full source of start tag: '<...>'."""
222
+ return self .__starttag_text
223
+
224
+ def parse_starttag (self , i ): # pragma: no cover
225
+ self .__starttag_text = None
226
+ endpos = self .check_for_whole_start_tag (i )
227
+ if endpos < 0 :
228
+ return endpos
229
+ rawdata = self .rawdata
230
+ self .__starttag_text = rawdata [i :endpos ]
231
+
232
+ # Now parse the data between i+1 and j into a tag and attrs
233
+ attrs = []
234
+ match = htmlparser .tagfind_tolerant .match (rawdata , i + 1 )
235
+ assert match , 'unexpected call to parse_starttag()'
236
+ k = match .end ()
237
+ self .lasttag = tag = match .group (1 ).lower ()
238
+ while k < endpos :
239
+ m = htmlparser .attrfind_tolerant .match (rawdata , k )
240
+ if not m :
241
+ break
242
+ attrname , rest , attrvalue = m .group (1 , 2 , 3 )
243
+ if not rest :
244
+ attrvalue = None
245
+ elif attrvalue [:1 ] == '\' ' == attrvalue [- 1 :] or \
246
+ attrvalue [:1 ] == '"' == attrvalue [- 1 :]: # noqa: E127
247
+ attrvalue = attrvalue [1 :- 1 ]
248
+ if attrvalue :
249
+ attrvalue = htmlparser .unescape (attrvalue )
250
+ attrs .append ((attrname .lower (), attrvalue ))
251
+ k = m .end ()
252
+
253
+ end = rawdata [k :endpos ].strip ()
254
+ if end not in (">" , "/>" ):
255
+ lineno , offset = self .getpos ()
256
+ if "\n " in self .__starttag_text :
257
+ lineno = lineno + self .__starttag_text .count ("\n " )
258
+ offset = len (self .__starttag_text ) \
259
+ - self .__starttag_text .rfind ("\n " ) # noqa: E127
260
+ else :
261
+ offset = offset + len (self .__starttag_text )
262
+ self .handle_data (rawdata [i :endpos ])
263
+ return endpos
264
+ if end .endswith ('/>' ):
265
+ # XHTML-style empty tag: <span attr="value" />
266
+ self .handle_startendtag (tag , attrs )
267
+ else :
268
+ # *** set cdata_mode first so we can override it in handle_starttag (see #1036) ***
269
+ if tag in self .CDATA_CONTENT_ELEMENTS :
270
+ self .set_cdata_mode (tag )
271
+ self .handle_starttag (tag , attrs )
272
+ return endpos
0 commit comments