@@ -150,6 +150,19 @@ def reset(self):
150150 self .beforeRCDataPhase = None
151151
152152 self .framesetOK = True
153+
154+ def isHTMLIntegrationPoint (self , element ):
155+ if (element .name == "annotation-xml" and
156+ element .namespace == namespaces ["mathml" ]):
157+ return ("encoding" in element .attributes and
158+ element .attributes ["encoding" ].translate (
159+ asciiUpper2Lower ) in
160+ ("text/html" , "application/xhtml+xml" ))
161+ else :
162+ return (element .namespace , element .name ) in htmlIntegrationPointElements
163+
164+ def isMathMLTextIntegrationPoint (self , element ):
165+ return (element .namespace , element .name ) in mathmlTextIntegrationPointElements
153166
154167 def mainLoop (self ):
155168 CharactersToken = tokenTypes ["Characters" ]
@@ -158,27 +171,48 @@ def mainLoop(self):
158171 EndTagToken = tokenTypes ["EndTag" ]
159172 CommentToken = tokenTypes ["Comment" ]
160173 DoctypeToken = tokenTypes ["Doctype" ]
161-
174+ ParseErrorToken = tokenTypes [ "ParseError" ]
162175
163176 for token in self .normalizedTokens ():
164177 new_token = token
165178 while new_token is not None :
179+ currentNode = self .tree .openElements [- 1 ] if self .tree .openElements else None
180+ currentNodeNamespace = currentNode .namespace if currentNode else None
181+ currentNodeName = currentNode .name if currentNode else None
182+
166183 type = new_token ["type" ]
167- if type == CharactersToken :
168- new_token = self .phase .processCharacters (new_token )
169- elif type == SpaceCharactersToken :
170- new_token = self .phase .processSpaceCharacters (new_token )
171- elif type == StartTagToken :
172- new_token = self .phase .processStartTag (new_token )
173- elif type == EndTagToken :
174- new_token = self .phase .processEndTag (new_token )
175- elif type == CommentToken :
176- new_token = self .phase .processComment (new_token )
177- elif type == DoctypeToken :
178- new_token = self .phase .processDoctype (new_token )
179- else :
184+
185+ if type == ParseErrorToken :
180186 self .parseError (new_token ["data" ], new_token .get ("datavars" , {}))
181187 new_token = None
188+ else :
189+ if (len (self .tree .openElements ) == 0 or
190+ currentNodeNamespace == self .tree .defaultNamespace or
191+ (self .isMathMLTextIntegrationPoint (currentNode ) and
192+ ((type == StartTagToken and
193+ token ["name" ] not in frozenset (["mglyph" , "malignmark" ])) or
194+ type in (CharactersToken , SpaceCharactersToken ))) or
195+ (currentNodeNamespace == namespaces ["mathml" ] and
196+ currentNodeName == "annotation-xml" and
197+ token ["name" ] == "svg" ) or
198+ (self .isHTMLIntegrationPoint (currentNode ) and
199+ type in (StartTagToken , CharactersToken , SpaceCharactersToken ))):
200+ phase = self .phase
201+ else :
202+ phase = self .phases ["inForeignContent" ]
203+
204+ if type == CharactersToken :
205+ new_token = phase .processCharacters (new_token )
206+ elif type == SpaceCharactersToken :
207+ new_token = phase .processSpaceCharacters (new_token )
208+ elif type == StartTagToken :
209+ new_token = phase .processStartTag (new_token )
210+ elif type == EndTagToken :
211+ new_token = phase .processEndTag (new_token )
212+ elif type == CommentToken :
213+ new_token = phase .processComment (new_token )
214+ elif type == DoctypeToken :
215+ new_token = phase .processDoctype (new_token )
182216
183217 if (type == StartTagToken and token ["selfClosing" ]
184218 and not token ["selfClosingAcknowledged" ]):
@@ -379,12 +413,12 @@ def resetInsertionMode(self):
379413 if nodeName in ("select" , "colgroup" , "head" , "html" ):
380414 assert self .innerHTML
381415
416+ if not last and node .namespace != self .tree .defaultNamespace :
417+ continue
418+
382419 if nodeName in newModes :
383420 new_phase = self .phases [newModes [nodeName ]]
384421 break
385- elif node .namespace in (namespaces ["mathml" ], namespaces ["svg" ]):
386- new_phase = self .phases ["inForeignContent" ]
387- break
388422 elif last :
389423 new_phase = self .phases ["inBody" ]
390424 break
@@ -419,7 +453,6 @@ def wrapped(self, *args, **kwargs):
419453 try :
420454 info = {"type" :type_names [token ['type' ]]}
421455 except :
422- print token
423456 raise
424457 if token ['type' ] in constants .tagTokenTypes :
425458 info ["name" ] = token ['name' ]
@@ -1243,7 +1276,6 @@ def startTagMath(self, token):
12431276 self .tree .insertElement (token )
12441277 #Need to get the parse error right for the case where the token
12451278 #has a namespace not equal to the xmlns attribute
1246- self .parser .phase = self .parser .phases ["inForeignContent" ]
12471279 if token ["selfClosing" ]:
12481280 self .tree .openElements .pop ()
12491281 token ["selfClosingAcknowledged" ] = True
@@ -1256,7 +1288,6 @@ def startTagSvg(self, token):
12561288 self .tree .insertElement (token )
12571289 #Need to get the parse error right for the case where the token
12581290 #has a namespace not equal to the xmlns attribute
1259- self .parser .phase = self .parser .phases ["inForeignContent" ]
12601291 if token ["selfClosing" ]:
12611292 self .tree .openElements .pop ()
12621293 token ["selfClosingAcknowledged" ] = True
@@ -1741,7 +1772,7 @@ def processSpaceCharacters(self, token):
17411772 self .characterTokens .append (token )
17421773 # assert False
17431774
1744- def processStartTag (self , token ):
1775+ def processStartTag (self , token ):
17451776 self .flushCharacters ()
17461777 self .parser .phase = self .originalPhase
17471778 return token
@@ -2298,7 +2329,7 @@ def endTagOther(self, token):
22982329 class InForeignContentPhase (Phase ):
22992330 breakoutElements = frozenset (["b" , "big" , "blockquote" , "body" , "br" ,
23002331 "center" , "code" , "dd" , "div" , "dl" , "dt" ,
2301- "em" , "embed" , "font" , " h1" , "h2" , "h3" ,
2332+ "em" , "embed" , "h1" , "h2" , "h3" ,
23022333 "h4" , "h5" , "h6" , "head" , "hr" , "i" , "img" ,
23032334 "li" , "listing" , "menu" , "meta" , "nobr" ,
23042335 "ol" , "p" , "pre" , "ruby" , "s" , "small" ,
@@ -2307,19 +2338,6 @@ class InForeignContentPhase(Phase):
23072338 def __init__ (self , parser , tree ):
23082339 Phase .__init__ (self , parser , tree )
23092340
2310- def isHTMLIntegrationPoint (self , element ):
2311- if (element .name == "annotation-xml" and
2312- element .namespace == namespaces ["mathml" ]):
2313- return ("encoding" in element .attributes and
2314- element .attributes ["encoding" ].translate (
2315- asciiUpper2Lower ) in
2316- ("text/html" , "application/xhtml+xml" ))
2317- else :
2318- return (element .namespace , element .name ) in htmlIntegrationPointElements
2319-
2320- def isMathMLTextIntegrationPoint (self , element ):
2321- return (element .namespace , element .name ) in mathmlTextIntegrationPointElements
2322-
23232341 def adjustSVGTagNames (self , token ):
23242342 replacements = {u"altglyph" :u"altGlyph" ,
23252343 u"altglyphdef" :u"altGlyphDef" ,
@@ -2362,48 +2380,25 @@ def adjustSVGTagNames(self, token):
23622380 token ["name" ] = replacements [token ["name" ]]
23632381
23642382 def processCharacters (self , token ):
2365- if (self .tree .openElements [- 1 ].namespace == self .tree .defaultNamespace or
2366- self .isHTMLIntegrationPoint (self .tree .openElements [- 1 ])):
2367- new_token = self .parser .phases ["inBody" ].processCharacters (token )
2368- self .parser .resetInsertionMode ()
2369- return new_token
2370- elif token ["data" ] == u"\u0000 " :
2383+ if token ["data" ] == u"\u0000 " :
23712384 token ["data" ] = u"\uFFFD "
23722385 elif (self .parser .framesetOK and
23732386 any (char not in spaceCharacters for char in token ["data" ])):
23742387 self .parser .framesetOK = False
23752388 Phase .processCharacters (self , token )
23762389
2377- def processEOF (self ):
2378- reprocess = self .parser .phases ["inBody" ].processEOF ()
2379- self .parser .resetInsertionMode ()
2380- return reprocess
2381-
23822390 def processStartTag (self , token ):
23832391 currentNode = self .tree .openElements [- 1 ]
2384- currentNodeNamespace = currentNode .namespace
2385- currentNodeName = currentNode .name
2386- if (currentNodeNamespace == self .tree .defaultNamespace or
2387- (self .isMathMLTextIntegrationPoint (currentNode ) and
2388- token ["name" ] not in frozenset (["mglyph" , "malignmark" ])) or
2389- (currentNodeNamespace == namespaces ["mathml" ] and
2390- currentNodeName == "annotation-xml" and
2391- token ["name" ] == "svg" ) or
2392- self .isHTMLIntegrationPoint (currentNode )):
2393-
2394- new_token = self .parser .phases ["inBody" ].processStartTag (token )
2395- self .parser .resetInsertionMode ()
2396- return new_token
2397-
2398- elif token ["name" ] in self .breakoutElements :
2392+ if (token ["name" ] in self .breakoutElements or
2393+ (token ["name" ] == "font" and
2394+ set (token ["data" ].keys ()) | set ("color" , "face" , "size" ))):
23992395 self .parser .parseError ("unexpected-html-element-in-foreign-content" ,
24002396 token ["name" ])
24012397 while (self .tree .openElements [- 1 ].namespace !=
24022398 self .tree .defaultNamespace and
2403- not self .isHTMLIntegrationPoint (self .tree .openElements [- 1 ]) and
2404- not self .isMathMLTextIntegrationPoint (self .tree .openElements [- 1 ])):
2399+ not self .parser . isHTMLIntegrationPoint (self .tree .openElements [- 1 ]) and
2400+ not self .parser . isMathMLTextIntegrationPoint (self .tree .openElements [- 1 ])):
24052401 self .tree .openElements .pop ()
2406- self .parser .resetInsertionMode ()
24072402 return token
24082403
24092404 else :
@@ -2420,33 +2415,29 @@ def processStartTag(self, token):
24202415 token ["selfClosingAcknowledged" ] = True
24212416
24222417 def processEndTag (self , token ):
2423- if self .tree .openElements [- 1 ].namespace == self .tree .defaultNamespace :
2424- new_token = self .parser .phases ["inBody" ].processEndTag (token )
2425- self .parser .resetInsertionMode ()
2426- return new_token
2427- else :
2428- nodeIndex = len (self .tree .openElements ) - 1
2429- node = self .tree .openElements [- 1 ]
2430- if node .name != token ["name" ]:
2431- self .parser .parseError ("unexpected-end-tag" , token ["name" ])
2432-
2433- while True :
2434- if node .name .translate (asciiUpper2Lower ) == token ["name" ]:
2435- while self .tree .openElements .pop () != node :
2436- assert self .tree .openElements
2437- new_token = None
2438- break
2439- nodeIndex -= 1
2418+ nodeIndex = len (self .tree .openElements ) - 1
2419+ node = self .tree .openElements [- 1 ]
2420+ if node .name != token ["name" ]:
2421+ self .parser .parseError ("unexpected-end-tag" , token ["name" ])
2422+
2423+ while True :
2424+ if node .name .translate (asciiUpper2Lower ) == token ["name" ]:
2425+ if self .parser .phase == self .parser .phases ["inTableText" ]:
2426+ self .parser .phase .flushCharacters ()
2427+ self .parser .phase = self .parser .phase .originalPhase
2428+ while self .tree .openElements .pop () != node :
2429+ assert self .tree .openElements
2430+ new_token = None
2431+ break
2432+ nodeIndex -= 1
24402433
2441- node = self .tree .openElements [nodeIndex ]
2442- if node .namespace != self .tree .defaultNamespace :
2443- continue
2444- else :
2445- new_token = self .parser .phases ["inBody" ].processEndTag (token )
2446- break
2447- if self .parser .phase == self :
2448- self .parser .resetInsertionMode ()
2449- return new_token
2434+ node = self .tree .openElements [nodeIndex ]
2435+ if node .namespace != self .tree .defaultNamespace :
2436+ continue
2437+ else :
2438+ new_token = self .parser .phase .processEndTag (token )
2439+ break
2440+ return new_token
24502441
24512442
24522443 class AfterBodyPhase (Phase ):
0 commit comments