@@ -98,8 +98,8 @@ class HTMLParser(_markupbase.ParserBase):
9898 containing respectively the named or numeric reference as the
9999 argument.
100100 """
101-
102- CDATA_CONTENT_ELEMENTS = ("script" , "style" )
101+ # For escapable raw text elements (textarea and title), CDATA mode is reused
102+ CDATA_CONTENT_ELEMENTS = ("script" , "style" , "textarea" , "title" )
103103
104104 def __init__ (self , * , convert_charrefs = True ):
105105 """Initialize and reset this instance.
@@ -117,6 +117,7 @@ def reset(self):
117117 self .lasttag = '???'
118118 self .interesting = interesting_normal
119119 self .cdata_elem = None
120+ self ._raw_escapable = False
120121 super ().reset ()
121122
122123 def feed (self , data ):
@@ -140,11 +141,16 @@ def get_starttag_text(self):
140141
141142 def set_cdata_mode (self , elem ):
142143 self .cdata_elem = elem .lower ()
143- self .interesting = re .compile (r'</\s*%s\s*>' % self .cdata_elem , re .I )
144+ if self .cdata_elem in ["textarea" , "title" ]:
145+ self ._raw_escapable = True
146+ self .interesting = re .compile ('[&]' )
147+ else :
148+ self .interesting = re .compile (r'</\s*%s\s*>' % self .cdata_elem , re .I )
144149
145150 def clear_cdata_mode (self ):
146151 self .interesting = interesting_normal
147152 self .cdata_elem = None
153+ self ._raw_escapable = False
148154
149155 # Internal -- handle data as far as reasonable. May leave state
150156 # and data to be processed by a subsequent call. If 'end' is
@@ -154,7 +160,7 @@ def goahead(self, end):
154160 i = 0
155161 n = len (rawdata )
156162 while i < n :
157- if self .convert_charrefs and not self .cdata_elem :
163+ if self .convert_charrefs and ( not self .cdata_elem or self . _raw_escapable ) :
158164 j = rawdata .find ('<' , i )
159165 if j < 0 :
160166 # if we can't find the next <, either we are at the end
@@ -177,7 +183,7 @@ def goahead(self, end):
177183 break
178184 j = n
179185 if i < j :
180- if self .convert_charrefs and not self .cdata_elem :
186+ if self .convert_charrefs and ( not self .cdata_elem or self . _raw_escapable ) :
181187 self .handle_data (unescape (rawdata [i :j ]))
182188 else :
183189 self .handle_data (rawdata [i :j ])
@@ -210,7 +216,7 @@ def goahead(self, end):
210216 k = i + 1
211217 else :
212218 k += 1
213- if self .convert_charrefs and not self .cdata_elem :
219+ if self .convert_charrefs and ( not self .cdata_elem or self . _raw_escapable ) :
214220 self .handle_data (unescape (rawdata [i :k ]))
215221 else :
216222 self .handle_data (rawdata [i :k ])
@@ -261,7 +267,7 @@ def goahead(self, end):
261267 assert 0 , "interesting.search() lied"
262268 # end while
263269 if end and i < n :
264- if self .convert_charrefs and not self .cdata_elem :
270+ if self .convert_charrefs and ( not self .cdata_elem or self . _raw_escapable ) :
265271 self .handle_data (unescape (rawdata [i :n ]))
266272 else :
267273 self .handle_data (rawdata [i :n ])
0 commit comments