@@ -81,37 +81,58 @@ def extract(htmlstring,
8181 ('microformat' , MicroformatExtractor ().extract_items ,
8282 htmlstring ))
8383 if 'rdfa' in syntaxes :
84- processors .append (('rdfa' , RDFaExtractor ().extract_items , tree ))
84+ processors .append (
85+ ('rdfa' , RDFaExtractor ().extract_items ,
86+ tree ,
87+ ))
8588 output = {}
86- for label , extract , document in processors :
89+ for syntax , extract , document in processors :
8790 try :
88- output [label ] = list (extract (document , base_url = base_url ))
91+ output [syntax ] = list (extract (document , base_url = base_url ))
8992 except Exception :
9093 if errors == 'log' :
91- logger .exception ('Failed to extract {}' .format (label ))
94+ logger .exception ('Failed to extract {}' .format (syntax ))
9295 if errors == 'ignore' :
9396 pass
9497 if errors == 'strict' :
9598 raise
96- try :
97- if uniform :
98- if 'microdata' in syntaxes :
99- output ['microdata' ] = _umicrodata_microformat (
100- output ['microdata' ], schema_context = schema_context )
101- if 'microformat' in syntaxes :
102- output ['microformat' ] = _umicrodata_microformat (
103- output ['microformat' ],
104- schema_context = 'http://microformats.org/wiki/' )
105- if 'opengraph' in syntaxes :
106- output ['opengraph' ] = _uopengraph (output ['opengraph' ])
107- except Exception as e :
108- if errors == 'ignore' :
109- return {}
110- if errors == 'log' :
111- logger .exception (
112- 'Failed to uniform extracted, exception raised {}' .format (e ))
113- return {}
114- if errors == 'strict' :
115- raise e
99+ if uniform :
100+ uniform_processors = []
101+ if 'microdata' in syntaxes :
102+ uniform_processors .append (
103+ ('microdata' ,
104+ _umicrodata_microformat ,
105+ output ['microdata' ],
106+ schema_context ,
107+ ))
108+ if 'microformat' in syntaxes :
109+ uniform_processors .append (
110+ ('microformat' ,
111+ _umicrodata_microformat ,
112+ output ['microformat' ],
113+ 'http://microformats.org/wiki/' ,
114+ ))
115+ if 'opengraph' in syntaxes :
116+ uniform_processors .append (
117+ ('opengraph' ,
118+ _uopengraph ,
119+ output ['opengraph' ],
120+ None ,
121+ ))
122+ for syntax , uniform , raw , schema_context in uniform_processors :
123+ try :
124+ if syntax == 'opengraph' :
125+ output [syntax ] = uniform (raw )
126+ else :
127+ output [syntax ] = uniform (raw , schema_context )
128+ except Exception as e :
129+ if errors == 'ignore' :
130+ output [syntax ] = []
131+ if errors == 'log' :
132+ output [syntax ] = []
133+ logger .exception (
134+ f'Failed to uniform extracted, exception raised { e } ' )
135+ if errors == 'strict' :
136+ raise e
116137
117138 return output
0 commit comments