Skip to content

Commit 18c6ea8

Browse files
Refactoring.
1 parent 7824ee8 commit 18c6ea8

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

Lib/html/parser.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ class HTMLParser(_markupbase.ParserBase):
126126
containing respectively the named or numeric reference as the
127127
argument.
128128
"""
129-
# For escapable raw text elements (textarea and title), CDATA mode is reused
130-
CDATA_CONTENT_ELEMENTS = ("script", "style", "textarea", "title")
129+
CDATA_CONTENT_ELEMENTS = ("script", "style")
130+
RCDATA_CONTENT_ELEMENTS = ("textarea", "title")
131131

132132
def __init__(self, *, convert_charrefs=True):
133133
"""Initialize and reset this instance.
@@ -145,7 +145,7 @@ def reset(self):
145145
self.lasttag = '???'
146146
self.interesting = interesting_normal
147147
self.cdata_elem = None
148-
self._raw_escapable = False
148+
self._escapable = True
149149
super().reset()
150150

151151
def feed(self, data):
@@ -167,10 +167,10 @@ def get_starttag_text(self):
167167
"""Return full source of start tag: '<...>'."""
168168
return self.__starttag_text
169169

170-
def set_cdata_mode(self, elem):
170+
def set_cdata_mode(self, elem, escapable=False):
171171
self.cdata_elem = elem.lower()
172-
self._raw_escapable = self.cdata_elem in ("textarea", "title")
173-
if self._raw_escapable and not self.convert_charrefs:
172+
self._escapable = escapable
173+
if escapable and not self.convert_charrefs:
174174
self.interesting = re.compile(r'&|</%s(?=[\t\n\r\f />])' % self.cdata_elem,
175175
re.IGNORECASE|re.ASCII)
176176
else:
@@ -180,7 +180,7 @@ def set_cdata_mode(self, elem):
180180
def clear_cdata_mode(self):
181181
self.interesting = interesting_normal
182182
self.cdata_elem = None
183-
self._raw_escapable = False
183+
self._escapable = True
184184

185185
# Internal -- handle data as far as reasonable. May leave state
186186
# and data to be processed by a subsequent call. If 'end' is
@@ -213,7 +213,7 @@ def goahead(self, end):
213213
break
214214
j = n
215215
if i < j:
216-
if self.convert_charrefs and (not self.cdata_elem or self._raw_escapable):
216+
if self.convert_charrefs and self._escapable:
217217
self.handle_data(unescape(rawdata[i:j]))
218218
else:
219219
self.handle_data(rawdata[i:j])
@@ -315,7 +315,7 @@ def goahead(self, end):
315315
assert 0, "interesting.search() lied"
316316
# end while
317317
if end and i < n:
318-
if self.convert_charrefs and (not self.cdata_elem or self._raw_escapable):
318+
if self.convert_charrefs and self._escapable:
319319
self.handle_data(unescape(rawdata[i:n]))
320320
else:
321321
self.handle_data(rawdata[i:n])
@@ -427,6 +427,8 @@ def parse_starttag(self, i):
427427
self.handle_starttag(tag, attrs)
428428
if tag in self.CDATA_CONTENT_ELEMENTS:
429429
self.set_cdata_mode(tag)
430+
elif tag in self.RCDATA_CONTENT_ELEMENTS:
431+
self.set_cdata_mode(tag, True)
430432
return endpos
431433

432434
# Internal -- check to see if we have a complete starttag; return end

0 commit comments

Comments
 (0)