From 6bf62826ec19012efa7e83dd93b46d887219a7f7 Mon Sep 17 00:00:00 2001 From: William Schaub Date: Fri, 6 Jun 2014 20:11:46 -0500 Subject: [PATCH 1/2] add sitemapindex generator add the generate_sitemapindex function which takes a list of dicts containing a 'loc' key and optionall a lastmod key (containing a datetime.date or datetime.datetime object) and creates a sitemapindex file. --- sitemap/sitemapindex.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sitemap/sitemapindex.py b/sitemap/sitemapindex.py index 644e54d..c00012b 100644 --- a/sitemap/sitemapindex.py +++ b/sitemap/sitemapindex.py @@ -1,4 +1,4 @@ - +import sys from lxml import etree from urllib import urlopen from cStringIO import StringIO @@ -7,6 +7,22 @@ from urlset import * from exceptions import * +def generate_sitemapindex(sitemaps,out=sys.stdout): + sitemapindex = etree.Element("sitemapindex",xmlns="http://www.sitemaps.org/schemas/sitemap/0.9") + for map in sitemaps: + sitemap = etree.Element("sitemap") + loc = etree.Element("loc") + lastmod = etree.Element("lastmod") + loc.text = map['loc'] + sitemap.append(loc) + if map.has_key('lastmod'): + lastmod.text = map['lastmod'].isoformat() + sitemap.append(lastmod) + sitemapindex.append(sitemap) + out.write(etree.tostring(sitemapindex,xml_declaration=True,pretty_print=True,encoding="UTF-8")) + + + class SitemapIndex(object): @staticmethod From 74a350ceb8d289e099ef19ffccbc52ed4ff05a4d Mon Sep 17 00:00:00 2001 From: William Schaub Date: Thu, 31 Jul 2014 20:33:46 -0500 Subject: [PATCH 2/2] iso8601: FixedOffset objects can not be unpickled This patch replaces the __init__ of FixedOffset with a set method instead. This is so we can unpickle datetime objects using this tzinfo based class as a tzinfo= argument. you can read all about it at https://docs.python.org/2/library/datetime.html#tzinfo-objects I ran into this while trying to unpickle a pickled dictionary filled with UrlSetElement objects that I had read in from the sitemap library which had a lastmod set in the parsed sitemap you get a nasty traceback complaining about __init__ requiring 4 arguments but only one given with out much useful info on what inside the pickle file caused it. --- sitemap/iso8601.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sitemap/iso8601.py b/sitemap/iso8601.py index a51be3e..aac7262 100755 --- a/sitemap/iso8601.py +++ b/sitemap/iso8601.py @@ -43,7 +43,7 @@ class FixedOffset(tzinfo): """Fixed offset in hours and minutes from UTC """ - def __init__(self, offset_hours, offset_minutes, name): + def set(self, offset_hours, offset_minutes, name): self.__offset = timedelta(hours=offset_hours, minutes=offset_minutes) self.__name = name @@ -76,7 +76,9 @@ def parse_timezone(tzstring, default_timezone=UTC): if prefix == "-": hours = -hours minutes = -minutes - return FixedOffset(hours, minutes, tzstring) + offset = FixedOffset() + offset.set(hours, minutes, tzstring) + return offset def parse_date(datestring, default_timezone=UTC): """Parses ISO 8601 dates into datetime objects