@@ -126,8 +126,8 @@ class HTMLParser(_markupbase.ParserBase):
126126 containing respectively the named or numeric reference as the
127127 argument.
128128 """
129- # For escapable raw text elements (textarea and title), CDATA mode is reused
130- CDATA_CONTENT_ELEMENTS = ("script" , "style" , "textarea" , "title" )
129+ CDATA_CONTENT_ELEMENTS = ( "script" , "style" )
130+ RCDATA_CONTENT_ELEMENTS = ("textarea" , "title" )
131131
132132 def __init__ (self , * , convert_charrefs = True ):
133133 """Initialize and reset this instance.
@@ -145,7 +145,7 @@ def reset(self):
145145 self .lasttag = '???'
146146 self .interesting = interesting_normal
147147 self .cdata_elem = None
148- self ._raw_escapable = False
148+ self ._escapable = True
149149 super ().reset ()
150150
151151 def feed (self , data ):
@@ -167,10 +167,10 @@ def get_starttag_text(self):
167167 """Return full source of start tag: '<...>'."""
168168 return self .__starttag_text
169169
170- def set_cdata_mode (self , elem ):
170+ def set_cdata_mode (self , elem , escapable = False ):
171171 self .cdata_elem = elem .lower ()
172- self ._raw_escapable = self . cdata_elem in ( "textarea" , "title" )
173- if self . _raw_escapable and not self .convert_charrefs :
172+ self ._escapable = escapable
173+ if escapable and not self .convert_charrefs :
174174 self .interesting = re .compile (r'&|</%s(?=[\t\n\r\f />])' % self .cdata_elem ,
175175 re .IGNORECASE | re .ASCII )
176176 else :
@@ -180,7 +180,7 @@ def set_cdata_mode(self, elem):
180180 def clear_cdata_mode (self ):
181181 self .interesting = interesting_normal
182182 self .cdata_elem = None
183- self ._raw_escapable = False
183+ self ._escapable = True
184184
185185 # Internal -- handle data as far as reasonable. May leave state
186186 # and data to be processed by a subsequent call. If 'end' is
@@ -213,7 +213,7 @@ def goahead(self, end):
213213 break
214214 j = n
215215 if i < j :
216- if self .convert_charrefs and ( not self .cdata_elem or self . _raw_escapable ) :
216+ if self .convert_charrefs and self ._escapable :
217217 self .handle_data (unescape (rawdata [i :j ]))
218218 else :
219219 self .handle_data (rawdata [i :j ])
@@ -315,7 +315,7 @@ def goahead(self, end):
315315 assert 0 , "interesting.search() lied"
316316 # end while
317317 if end and i < n :
318- if self .convert_charrefs and ( not self .cdata_elem or self . _raw_escapable ) :
318+ if self .convert_charrefs and self ._escapable :
319319 self .handle_data (unescape (rawdata [i :n ]))
320320 else :
321321 self .handle_data (rawdata [i :n ])
@@ -427,6 +427,8 @@ def parse_starttag(self, i):
427427 self .handle_starttag (tag , attrs )
428428 if tag in self .CDATA_CONTENT_ELEMENTS :
429429 self .set_cdata_mode (tag )
430+ elif tag in self .RCDATA_CONTENT_ELEMENTS :
431+ self .set_cdata_mode (tag , True )
430432 return endpos
431433
432434 # Internal -- check to see if we have a complete starttag; return end
0 commit comments