Skip to content

Commit 46873cc

Browse files
committed
use process to uniform
1 parent 440d1e5 commit 46873cc

File tree

1 file changed

+45
-24
lines changed

1 file changed

+45
-24
lines changed

extruct/_extruct.py

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -81,37 +81,58 @@ def extract(htmlstring,
8181
('microformat', MicroformatExtractor().extract_items,
8282
htmlstring))
8383
if 'rdfa' in syntaxes:
84-
processors.append(('rdfa', RDFaExtractor().extract_items, tree))
84+
processors.append(
85+
('rdfa', RDFaExtractor().extract_items,
86+
tree,
87+
))
8588
output = {}
86-
for label, extract, document in processors:
89+
for syntax, extract, document in processors:
8790
try:
88-
output[label] = list(extract(document, base_url=base_url))
91+
output[syntax] = list(extract(document, base_url=base_url))
8992
except Exception:
9093
if errors == 'log':
91-
logger.exception('Failed to extract {}'.format(label))
94+
logger.exception('Failed to extract {}'.format(syntax))
9295
if errors == 'ignore':
9396
pass
9497
if errors == 'strict':
9598
raise
96-
try:
97-
if uniform:
98-
if 'microdata' in syntaxes:
99-
output['microdata'] = _umicrodata_microformat(
100-
output['microdata'], schema_context=schema_context)
101-
if 'microformat' in syntaxes:
102-
output['microformat'] = _umicrodata_microformat(
103-
output['microformat'],
104-
schema_context='http://microformats.org/wiki/')
105-
if 'opengraph' in syntaxes:
106-
output['opengraph'] = _uopengraph(output['opengraph'])
107-
except Exception as e:
108-
if errors == 'ignore':
109-
return {}
110-
if errors == 'log':
111-
logger.exception(
112-
'Failed to uniform extracted, exception raised {}'.format(e))
113-
return {}
114-
if errors == 'strict':
115-
raise e
99+
if uniform:
100+
uniform_processors = []
101+
if 'microdata' in syntaxes:
102+
uniform_processors.append(
103+
('microdata',
104+
_umicrodata_microformat,
105+
output['microdata'],
106+
schema_context,
107+
))
108+
if 'microformat' in syntaxes:
109+
uniform_processors.append(
110+
('microformat',
111+
_umicrodata_microformat,
112+
output['microformat'],
113+
'http://microformats.org/wiki/',
114+
))
115+
if 'opengraph' in syntaxes:
116+
uniform_processors.append(
117+
('opengraph',
118+
_uopengraph,
119+
output['opengraph'],
120+
None,
121+
))
122+
for syntax, uniform, raw, schema_context in uniform_processors:
123+
try:
124+
if syntax == 'opengraph':
125+
output[syntax] = uniform(raw)
126+
else:
127+
output[syntax] = uniform(raw, schema_context)
128+
except Exception as e:
129+
if errors == 'ignore':
130+
output[syntax] = []
131+
if errors == 'log':
132+
output[syntax] = []
133+
logger.exception(
134+
f'Failed to uniform extracted, exception raised {e}')
135+
if errors == 'strict':
136+
raise e
116137

117138
return output

0 commit comments

Comments
 (0)