Skip to content

Commit dac5f83

Browse files
committed
support errors = log
1 parent 25a832d commit dac5f83

File tree

1 file changed

+45
-30
lines changed

1 file changed

+45
-30
lines changed

extruct/_extruct.py

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -56,35 +56,44 @@ def extract(htmlstring,
5656
', "ignore" or "strict"')
5757
try:
5858
tree = parse_xmldom_html(htmlstring, encoding=encoding)
59-
processors = []
60-
if 'microdata' in syntaxes:
61-
processors.append(
62-
('microdata', MicrodataExtractor(
63-
add_html_node=return_html_node).extract_items, tree))
64-
if 'json-ld' in syntaxes:
65-
processors.append(('json-ld', JsonLdExtractor().extract_items,
66-
tree))
67-
if 'opengraph' in syntaxes:
68-
processors.append(('opengraph', OpenGraphExtractor().extract_items,
69-
tree))
70-
if 'microformat' in syntaxes:
71-
processors.append(
72-
('microformat', MicroformatExtractor().extract_items,
73-
htmlstring))
74-
if 'rdfa' in syntaxes:
75-
processors.append(('rdfa', RDFaExtractor().extract_items, tree))
76-
output = {}
77-
for label, extract, document in processors:
78-
try:
79-
output[label] = list(extract(document, base_url=base_url))
80-
except Exception:
81-
if errors == 'log':
82-
logger.exception('Failed to extract {}'.format(label))
83-
if errors == 'ignore':
84-
pass
85-
if errors == 'strict':
86-
raise
87-
59+
except Exception as e:
60+
if errors == 'ignore':
61+
return {}
62+
if errors == 'log':
63+
logger.exception(
64+
'Failed to parse html, exception raised {}'.format(e))
65+
return {}
66+
if errors == 'strict':
67+
raise e
68+
processors = []
69+
if 'microdata' in syntaxes:
70+
processors.append(
71+
('microdata', MicrodataExtractor(
72+
add_html_node=return_html_node).extract_items, tree))
73+
if 'json-ld' in syntaxes:
74+
processors.append(('json-ld', JsonLdExtractor().extract_items,
75+
tree))
76+
if 'opengraph' in syntaxes:
77+
processors.append(('opengraph', OpenGraphExtractor().extract_items,
78+
tree))
79+
if 'microformat' in syntaxes:
80+
processors.append(
81+
('microformat', MicroformatExtractor().extract_items,
82+
htmlstring))
83+
if 'rdfa' in syntaxes:
84+
processors.append(('rdfa', RDFaExtractor().extract_items, tree))
85+
output = {}
86+
for label, extract, document in processors:
87+
try:
88+
output[label] = list(extract(document, base_url=base_url))
89+
except Exception:
90+
if errors == 'log':
91+
logger.exception('Failed to extract {}'.format(label))
92+
if errors == 'ignore':
93+
pass
94+
if errors == 'strict':
95+
raise
96+
try:
8897
if uniform:
8998
if 'microdata' in syntaxes:
9099
output['microdata'] = _umicrodata_microformat(
@@ -98,5 +107,11 @@ def extract(htmlstring,
98107
except Exception as e:
99108
if errors == 'ignore':
100109
return {}
101-
raise e
110+
if errors == 'log':
111+
logger.exception(
112+
'Failed to uniform extracted, exception raised {}'.format(e))
113+
return {}
114+
if errors == 'strict':
115+
raise e
116+
102117
return output

0 commit comments

Comments
 (0)