Skip to content

Commit 4171aa2

Browse files
committed
Fix referencing with itemref
1 parent 258664f commit 4171aa2

File tree

4 files changed

+83
-80
lines changed

4 files changed

+83
-80
lines changed

extruct/w3cmicrodata.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
"""
1111

1212
import collections
13+
from functools import partial
14+
1315
try:
1416
from urlparse import urljoin
1517
except ImportError:
@@ -123,17 +125,19 @@ def _extract_property_refs(self, node, refid, items_seen, base_url):
123125
if not ref_node:
124126
return
125127
ref_node = ref_node[0]
126-
base_parent_scope = ref_node.xpath("ancestor-or-self::*[@itemscope][1]")
127-
if 'itemprop' in ref_node.keys():
128-
for p, v in self._extract_property(
129-
ref_node, items_seen=items_seen, base_url=base_url):
130-
yield p, v
131-
for prop in ref_node.xpath("descendant::*[@itemprop]"):
132-
parent_scope = prop.xpath("ancestor::*[@itemscope][1]")
133-
if parent_scope == base_parent_scope:
134-
for p, v in self._extract_property(
135-
prop, items_seen=items_seen, base_url=base_url):
136-
yield p, v
128+
extract_fn = partial(self._extract_property, items_seen=items_seen,
129+
base_url=base_url)
130+
if 'itemprop' in ref_node.keys() and 'itemscope' in ref_node.keys():
131+
# An full item will be extracted from the node, no need to look
132+
# for individual properties in childs
133+
yield from extract_fn(ref_node)
134+
else:
135+
base_parent_scope = ref_node.xpath("ancestor-or-self::*[@itemscope][1]")
136+
for prop in ref_node.xpath("descendant-or-self::*[@itemprop]"):
137+
parent_scope = prop.xpath("ancestor::*[@itemscope][1]")
138+
# Skip properties defined in a different scope than the ref_node
139+
if parent_scope == base_parent_scope:
140+
yield from extract_fn(prop)
137141

138142
def _extract_property(self, node, items_seen, base_url):
139143
props = node.get("itemprop").split()

tests/samples/schema.org/product-ref.html

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
</head>
66
<body>
77

8-
<div id="product" itemscope itemtype="http://schema.org/Product" itemref="other-product-properties more-properties related_products">
8+
<div id="product" itemscope itemtype="http://schema.org/Product" itemref="referenced-product more-properties related_products non-existing-ref">
99
<span itemprop="brand">ACME</span>
1010
<span itemprop="name">Executive Anvil</span>
1111
<img itemprop="image" src=" anvil_executive.jpg" alt="Executive Anvil logo"/>
@@ -33,13 +33,13 @@
3333
<link itemprop="availability" href=" http://schema.org/InStock"/>In stock! Order now!
3434
</span>
3535
</div>
36-
<div id="other-product-properties" itemscope itemtype="http://schema.org/Product" itemprop="referenced_product">
37-
<span itemprop="prop2">PROP 2</span>
38-
<img itemprop="image" src="img-2.jpg">
36+
<div id="referenced-product" itemscope itemtype="http://schema.org/Product" itemprop="referenced_product">
37+
<span itemprop="name">REFERENCED PRODUCT</span>
38+
<img itemprop="image" src="img-ref.jpg">
3939
</div>
4040
<div id="more-properties" itemscope itemtype="http://schema.org/Product">
41-
<span itemprop="prop3">PROP 3</span>
42-
<img itemprop="image" src="img-3.jpg">
41+
<span itemprop="prop3">REFERENCED TO INCLUDE PROPERTIES AND ALSO INDIVIDUAL PRODUCT</span>
42+
<img itemprop="image" src="img-2.jpg">
4343
</div>
4444
<div id="related_products">
4545
<div itemscope itemtype="http://schema.org/Product" itemprop="related_products">
Lines changed: 61 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,69 @@
11
[
2-
{
3-
"type": "http://schema.org/Product",
4-
"properties": {
5-
"referenced_product": {
2+
{
63
"type": "http://schema.org/Product",
74
"properties": {
8-
"prop2": "PROP 2",
9-
"image": "img-2.jpg"
10-
}
11-
},
12-
"prop2": "PROP 2",
13-
"image": [
14-
"img-2.jpg",
15-
"img-3.jpg",
16-
"anvil_executive.jpg"
17-
],
18-
"prop3": "PROP 3",
19-
"related_products": [
20-
{
21-
"type": "http://schema.org/Product",
22-
"properties": {
23-
"name": "REL PROD 1",
24-
"image": "rel-prod-1.jpg"
25-
}
26-
},
27-
{
28-
"type": "http://schema.org/Product",
29-
"properties": {
30-
"name": "REL PROD 2",
31-
"image": "rel-prod-2.jpg"
32-
}
33-
}
34-
],
35-
"brand": "ACME",
36-
"name": "Executive Anvil",
37-
"description": "Sleeker than ACME's Classic Anvil, the\n Executive Anvil is perfect for the business traveler\n looking for something to drop from a height.",
38-
"mpn": "925872",
39-
"aggregateRating": {
40-
"type": "http://schema.org/AggregateRating",
41-
"properties": {
42-
"ratingValue": "4.4",
43-
"reviewCount": "89"
5+
"referenced_product": {
6+
"type": "http://schema.org/Product",
7+
"properties": {
8+
"name": "REFERENCED PRODUCT",
9+
"image": "img-ref.jpg"
10+
}
11+
},
12+
"prop3": "REFERENCED TO INCLUDE PROPERTIES AND ALSO INDIVIDUAL PRODUCT",
13+
"image": [
14+
"img-2.jpg",
15+
"anvil_executive.jpg"
16+
],
17+
"related_products": [
18+
{
19+
"type": "http://schema.org/Product",
20+
"properties": {
21+
"name": "REL PROD 1",
22+
"image": "rel-prod-1.jpg"
23+
}
24+
},
25+
{
26+
"type": "http://schema.org/Product",
27+
"properties": {
28+
"name": "REL PROD 2",
29+
"image": "rel-prod-2.jpg"
30+
}
31+
}
32+
],
33+
"brand": "ACME",
34+
"name": "Executive Anvil",
35+
"description": "Sleeker than ACME's Classic Anvil, the\n Executive Anvil is perfect for the business traveler\n looking for something to drop from a height.",
36+
"mpn": "925872",
37+
"aggregateRating": {
38+
"type": "http://schema.org/AggregateRating",
39+
"properties": {
40+
"ratingValue": "4.4",
41+
"reviewCount": "89"
42+
}
43+
},
44+
"offers": {
45+
"type": "http://schema.org/Offer",
46+
"properties": {
47+
"priceCurrency": "USD",
48+
"price": "119.99",
49+
"priceValidUntil": "2020-11-05",
50+
"seller": {
51+
"type": "http://schema.org/Organization",
52+
"properties": {
53+
"name": "Executive Objects"
54+
}
55+
},
56+
"itemCondition": "http://schema.org/UsedCondition",
57+
"availability": "http://schema.org/InStock"
58+
}
59+
}
4460
}
45-
},
46-
"offers": {
47-
"type": "http://schema.org/Offer",
61+
},
62+
{
63+
"type": "http://schema.org/Product",
4864
"properties": {
49-
"priceCurrency": "USD",
50-
"price": "119.99",
51-
"priceValidUntil": "2020-11-05",
52-
"seller": {
53-
"type": "http://schema.org/Organization",
54-
"properties": {
55-
"name": "Executive Objects"
56-
}
57-
},
58-
"itemCondition": "http://schema.org/UsedCondition",
59-
"availability": "http://schema.org/InStock"
65+
"prop3": "REFERENCED TO INCLUDE PROPERTIES AND ALSO INDIVIDUAL PRODUCT",
66+
"image": "img-2.jpg"
6067
}
61-
}
62-
}
63-
},
64-
{
65-
"type": "http://schema.org/Product",
66-
"properties": {
67-
"prop3": "PROP 3",
68-
"image": "img-3.jpg"
6968
}
70-
}
7169
]

tests/test_microdata.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,4 +183,5 @@ def test_join_none(self):
183183

184184
mde = MicrodataExtractor()
185185
data = mde.extract(body)
186+
print(json.dumps(data, indent=4))
186187
self.assertEqual(data, expected)

0 commit comments

Comments
 (0)